diff --git data/files/student_2_lines data/files/student_2_lines new file mode 100644 index 0000000..9e86836 --- /dev/null +++ data/files/student_2_lines @@ -0,0 +1,2 @@ +tom thompson420.53 +luke king280.47 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 80e6aee..1ce3ba6 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -731,6 +731,7 @@ minillaplocal.query.files=\ vector_number_compare_projection.q,\ vector_partitioned_date_time.q,\ vector_ptf_part_simple.q,\ + vector_udf_adaptor_1,\ vector_udf1.q,\ vector_windowing.q,\ vector_windowing_expressions.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index 56312d9..f9b3f76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -22,24 +22,34 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -public class IfExprColumnNull extends IfExprConditionalFilter { +public class IfExprColumnNull extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) { - super(arg1Column, arg2Column, -1, outputColumn); + private final int arg1Column; + private final int arg2Column; + + public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; } public IfExprColumnNull() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } + final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; final ColumnVector arg2ColVector = batch.cols[arg2Column]; final ColumnVector outputColVector = batch.cols[outputColumnNum]; @@ -94,4 +104,8 @@ public String vectorExpressionParameters() { return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + ", null"; } + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java deleted file mode 100644 index c17407e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; - - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * For conditional expressions, the{@code IfExprConditionalFilter} class updated - * the selected array of batch parameter after the conditional expression is executed. - * Then the remaining expression will only do the selected rows instead of all. - */ -public class IfExprConditionalFilter extends VectorExpression { - protected int arg1Column = -1; - protected int arg2Column = -1; - protected int arg3Column = -1; - protected int arg2ColumnTmp = -1; - - public IfExprConditionalFilter(int arg1Column, int arg2Column, int arg3Column, - int outputColumnNum) { - super(outputColumnNum); - this.arg1Column = arg1Column; - if(arg2Column == -1){ - this.arg2Column = arg3Column; - this.arg2ColumnTmp = -1; - } else{ - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.arg2ColumnTmp = arg2Column; - } - } - - public IfExprConditionalFilter() { - super(); - } - - /** - * For If(expr1,expr2,expr3) expression, - * Firstly, save the previous selected vector, size and selectedInUse value of batch. - * Secondly evaluate the conditional expression and update the selected array of batch based - * on the result of conditional expression(1 denote done, 0 denote not done) - * Then evaluate the expr2 based on the updated selected. - * After the expr2 is executed, remove the indexes which have done in expr2. - * Last, evaluate the expr3 based on the updated selected. - * - * @param batch - * @param childExpressions the childExpressions need to be evaluated. - */ - public void evaluateIfConditionalExpr(VectorizedRowBatch batch, VectorExpression[] childExpressions) { - if (childExpressions != null) { - // Save the previous selected vector, size and selectedInUse value of batch. - int[] prevSelected = new int[batch.selected.length]; - int[] prevSelectedFalse = new int[batch.selected.length]; - int prevSize = batch.size; - boolean prevSelectInUse = batch.selectedInUse; - if (!batch.selectedInUse) { - for (int i = 0; i < batch.size; i++) { - prevSelected[i] = i; - } - System.arraycopy(batch.selected, 0, prevSelectedFalse, 0, batch.selected.length); - System.arraycopy(prevSelected, 0, batch.selected, 0, batch.size); - } else { - System.arraycopy(batch.selected, 0, prevSelected, 0, batch.selected.length); - } - - // Evaluate the conditional expression. - evaluateConditionalExpression(batch, childExpressions[0], - prevSize, prevSelectInUse); - if (childExpressions != null && childExpressions.length == 2) { - // If the length is 2, it has two situations:If(expr1,expr2,null) or - // If(expr1,null,expr3) distinguished by the indexes. - if (childExpressions[1].getOutputColumnNum() == arg2ColumnTmp) { - // Evaluate the expr2 expression. - childExpressions[1].evaluate(batch); - } else { - // Update the selected array of batch to remove the index of being done. - evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); - // If(expr1,null,expr3), if the expr1 is false, expr3 will be evaluated. - childExpressions[1].evaluate(batch); - } - } else if (childExpressions != null && childExpressions.length == 3) { - // IF(expr1,expr2,expr3). expr1,expr2,expr3 are all the expression. - // Evaluate the expr2 expression. - childExpressions[1].evaluate(batch); - // Update the selected array of batch to remove the index of being done. - evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize); - // Evaluate the expr3 expression. - childExpressions[2].evaluate(batch); - } - // When evaluate all the expressions, restore the previous selected - // vector,size and selectedInUse value of batch. - batch.size = prevSize; - batch.selectedInUse = prevSelectInUse; - if(!prevSelectInUse){ - batch.selected = prevSelectedFalse; - } else{ - batch.selected = prevSelected; - } - } - } - - - /** - * Update the selected array of batch based on the conditional expression - * result, remove the index of being done. - * - * @param batch - * @param num the column num of conditional expression in batch cols - * @param prevSelected the previous selected array - */ - private static void evaluateSelectedArray(VectorizedRowBatch batch, int num, - int[] prevSelected, int prevSize) { - // Get the result of conditional expression. - LongColumnVector outputColVector = (LongColumnVector) batch.cols[num]; - long[] flag = outputColVector.vector; - int newSize = 0; - // Update the selected array of batch - for (int j = 0; j < prevSize; j++) { - if (flag[prevSelected[j]] == 0) { - batch.selected[newSize++] = prevSelected[j]; - } - } - batch.size = newSize; - batch.selectedInUse = true; - } - - /** - * Evaluate the conditional expression and update the selected array of batch - * based on the result of conditional expression. - * - * @param batch - * @param ve the conditional expression need to evaluate - * @param prevSize the previous batch size - * @param prevSelectInUse the previous selectInUse - */ - private static void evaluateConditionalExpression(VectorizedRowBatch batch, - VectorExpression ve, int prevSize, - boolean prevSelectInUse) { - batch.size = prevSize; - batch.selectedInUse = prevSelectInUse; - int colNum = ve.getOutputColumnNum(); - // Evaluate the conditional expression. - ve.evaluate(batch); - LongColumnVector outputColVector = (LongColumnVector) batch.cols[colNum]; - long[] flag = outputColVector.vector; - int[] sel = batch.selected; - int newSize = 0; - // Update the selected array of the batch based on the conditional expression. - for (int j = 0; j < batch.size; j++) { - int k = sel[j]; - if (flag[k] == 1) { - sel[newSize++] = k; - } - } - if(newSize < batch.size ) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - throw new UnsupportedOperationException("Undefined descriptor"); - } - - @Override - public String vectorExpressionParameters() { - return null; - } -} - diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index d0a9785..e7d4e4d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -27,23 +27,36 @@ * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprDoubleColumnDoubleColumn extends IfExprConditionalFilter { +public class IfExprDoubleColumnDoubleColumn extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - super(arg1Column, arg2Column, arg3Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; } public IfExprDoubleColumnDoubleColumn() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index 22a00f6..fa7b2da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -27,23 +27,36 @@ * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprIntervalDayTimeColumnColumn extends IfExprConditionalFilter { +public class IfExprIntervalDayTimeColumnColumn extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - super(arg1Column, arg2Column, arg3Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; } public IfExprIntervalDayTimeColumnColumn() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 71346f0..0c8a2f6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -26,23 +26,36 @@ * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public class IfExprLongColumnLongColumn extends IfExprConditionalFilter { +public class IfExprLongColumnLongColumn extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - super(arg1Column, arg2Column, arg3Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; } public IfExprLongColumnLongColumn() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 99185a0..85c37f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -22,23 +22,32 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -public class IfExprNullColumn extends IfExprConditionalFilter { +public class IfExprNullColumn extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) { - super(arg1Column, -1, arg2Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + + public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; } public IfExprNullColumn() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; @@ -95,4 +104,8 @@ public String vectorExpressionParameters() { return getColumnParamString(0, arg1Column) + ", null, col "+ arg2Column; } + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index 91c6c91..09aa9ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -29,23 +29,36 @@ * The first is always a boolean (LongColumnVector). * The second and third are string columns or string expression results. */ -public class IfExprStringGroupColumnStringGroupColumn extends IfExprConditionalFilter { +public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - super(arg1Column, arg2Column, arg3Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; } public IfExprStringGroupColumnStringGroupColumn() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index 690f04c..ee3cd19 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -26,22 +26,36 @@ * The first is always a boolean (LongColumnVector). * The second and third are long columns or long expression results. */ -public abstract class IfExprTimestampColumnColumnBase extends IfExprConditionalFilter { +public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { private static final long serialVersionUID = 1L; - public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - super(arg1Column, arg2Column, arg3Column, outputColumn); + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; } public IfExprTimestampColumnColumnBase() { super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; } @Override public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { - super.evaluateIfConditionalExpr(batch, childExpressions); + super.evaluateChildren(batch); } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 7326842..a1a1282 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -31,11 +31,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprConditionalFilter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; @@ -62,7 +60,6 @@ private String resultType; private VectorUDFArgDesc[] argDescs; private ExprNodeGenericFuncDesc expr; - private IfExprConditionalFilter cf; private transient GenericUDF genericUDF; private transient GenericUDF.DeferredObject[] deferredChildren; @@ -105,13 +102,6 @@ public void init() throws HiveException, UDFArgumentException { outputVectorAssignRow.init(outputTypeInfo, outputColumnNum); genericUDF.initialize(childrenOIs); - if((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName())){ - - // UNDONE: This kind of work should be done in VectorizationContext. - cf = new IfExprConditionalFilter - (argDescs[0].getColumnNum(), argDescs[1].getColumnNum(), - argDescs[2].getColumnNum(), outputColumnNum); - } // Initialize constant arguments for (int i = 0; i < argDescs.length; i++) { @@ -133,11 +123,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (childExpressions != null) { - if ((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName()) && cf != null) { - cf.evaluateIfConditionalExpr(batch, childExpressions); - } else { - super.evaluateChildren(batch); - } + super.evaluateChildren(batch); } int[] sel = batch.selected; diff --git ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q new file mode 100644 index 0000000..2eb0a0a --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q @@ -0,0 +1,27 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.stats.column.autogather=false; + +create table student_2_lines( +name string, +age int, +gpa double) +row format delimited +fields terminated by '\001' +stored as textfile; +LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines; +analyze table student_2_lines compute statistics; + +create table insert_10_1 (a float, b int, c timestamp, d binary); + +explain vectorization detail +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index 3c48607..99d7cfc 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -79,19 +79,19 @@ CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC; INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0); --for length=3 -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr2 is null -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr3 is null -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; @@ -102,19 +102,19 @@ CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC; INSERT INTO test_2 VALUES (3,1),(2,2),(1,3); --for length=3 -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; --for length=2 and the expression2 is null -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; --for length=2 and the expression3 is null -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out new file mode 100644 index 0000000..a752dfa --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: create table student_2_lines( +name string, +age int, +gpa double) +row format delimited +fields terminated by '\001' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: create table student_2_lines( +name string, +age int, +gpa double) +row format delimited +fields terminated by '\001' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: analyze table student_2_lines compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@student_2_lines +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: analyze table student_2_lines compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_2_lines +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_10_1 +POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_10_1 +PREHOOK: query: explain vectorization detail +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_2_lines + Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 5, 8] + selectExpressions: VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary + Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, bigint, string, string] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_2_lines +PREHOOK: Output: default@insert_10_1 +POSTHOOK: query: insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_2_lines +POSTHOOK: Output: default@insert_10_1 +POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 8dcff32..f56d9ce 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -514,12 +514,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -534,12 +538,22 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -547,6 +561,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -565,12 +588,16 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -585,12 +612,22 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -598,6 +635,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -616,12 +662,16 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -636,12 +686,22 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -649,6 +709,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -685,12 +754,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -705,12 +778,22 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -718,6 +801,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -736,12 +828,16 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -756,12 +852,22 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -769,6 +875,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -787,12 +902,16 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -807,12 +926,22 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -820,6 +949,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 583e902..c1dd74c 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -508,12 +508,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -528,18 +532,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -558,12 +581,16 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -578,18 +605,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -608,12 +654,16 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -628,18 +678,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -676,12 +745,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -696,18 +769,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -726,12 +818,16 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -746,18 +842,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -776,12 +891,16 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -796,18 +915,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out new file mode 100644 index 0000000..6efcd8c --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: create table student_2_lines( +name string, +age int, +gpa double) +row format delimited +fields terminated by '\001' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: create table student_2_lines( +name string, +age int, +gpa double) +row format delimited +fields terminated by '\001' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: analyze table student_2_lines compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@student_2_lines +PREHOOK: Output: default@student_2_lines +POSTHOOK: query: analyze table student_2_lines compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_2_lines +POSTHOOK: Output: default@student_2_lines +PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_10_1 +POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_10_1 +PREHOOK: query: explain vectorization detail +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: student_2_lines + Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 5, 8] + selectExpressions: VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary + Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp, bigint, string, string] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_10_1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_2_lines +PREHOOK: Output: default@insert_10_1 +POSTHOOK: query: insert overwrite table insert_10_1 + select cast(gpa as float), + age, + IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), + IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_2_lines +POSTHOOK: Output: default@insert_10_1 +POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 0489d72..50e9b0e 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -466,12 +466,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -483,18 +487,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -513,12 +536,16 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -530,18 +557,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -560,12 +606,16 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -577,18 +627,37 @@ STAGE PLANS: TableScan alias: test_1 Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -625,12 +694,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -642,18 +715,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [6] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -672,12 +764,16 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -689,18 +785,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -719,12 +834,16 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -736,18 +855,37 @@ STAGE PLANS: TableScan alias: test_2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator