diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1e322b8..edea129 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2978,7 +2978,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Which vectorized input format support features are enabled for vectorization.\n" + "That is, if a VectorizedInputFormat input format does support \"decimal_64\" for example\n" + "this variable must enable that to be used in vectorization"), - + HIVE_VECTORIZED_IF_EXPR_MODE("hive.vectorized.if.expr.mode", "better", new StringSet("adaptor", "good", "better"), + "Specifies the extent to which SQL IF statements will be vectorized.\n" + + "0. adaptor: only use the VectorUDFAdaptor to vectorize IF statements\n" + + "1. good : use regular vectorized IF expression classes that get good performance\n" + + "2. better : use vectorized IF expression classes that conditionally execute THEN/ELSE\n" + + " expressions for better performance.\n"), HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE("hive.test.vectorized.execution.enabled.override", "none", new StringSet("none", "enable", "disable"), "internal use only, used to override the hive.vectorized.execution.enabled setting and\n" + diff --git data/files/student_10_lines data/files/student_10_lines new file mode 100644 index 0000000..2f1b331 --- /dev/null +++ data/files/student_10_lines @@ -0,0 +1,10 @@ +tom thompson420.53 +luke king280.47 +priscilla falkner551.16 +luke brown601.14 +ulysses garcia352.74 +calvin brown282.70 +oscar thompson352.98 +xavier garcia331.06 +nick johnson34 +quinn ovid19 diff --git data/files/student_2_lines data/files/student_2_lines deleted file mode 100644 index 9e86836..0000000 --- data/files/student_2_lines +++ /dev/null @@ -1,2 +0,0 @@ -tom thompson420.53 -luke king280.47 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 2776fe9..6e4e7f9 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -314,6 +314,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_binary_join_groupby.q,\ vector_bround.q,\ vector_bucket.q,\ + vector_case_when_1.q,\ vector_cast_constant.q,\ vector_char_2.q,\ vector_char_4.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 4df6e97..93212ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -133,6 +133,21 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { } private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + + public enum HiveVectorIfStmtMode { + ADAPTOR, + GOOD, + BETTER; + + public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) { + String string = HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE); + return valueOf(string.toUpperCase()); + } + } + + private HiveVectorIfStmtMode hiveVectorIfStmtMode; + //when set to true use the overflow checked vector expressions private boolean useCheckedVectorExpressions; @@ -141,6 +156,7 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { private void setHiveConfVars(HiveConf hiveConf) { hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); + hiveVectorIfStmtMode = HiveVectorIfStmtMode.getHiveConfValue(hiveConf); this.reuseScratchColumns = HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); this.ocm.setReuseColumns(reuseScratchColumns); @@ -150,6 +166,7 @@ private void setHiveConfVars(HiveConf hiveConf) { private void copyHiveConfVars(VectorizationContext vContextEnvironment) { hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; + hiveVectorIfStmtMode = vContextEnvironment.hiveVectorIfStmtMode; this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns; this.ocm.setReuseColumns(reuseScratchColumns); } @@ -765,7 +782,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress // and that would require converting their data types to evaluate the udf. // For example decimal column added to an integer column would require integer column to be // cast to decimal. - // Note: this is a no-op for custom UDFs + // Note: this is a no-op for custom UDFs List childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(), exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), @@ -1978,6 +1995,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); + } else if (udf instanceof GenericUDFIf) { + ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType); } else if (udf instanceof GenericUDFWhen) { ve = getWhenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { @@ -2911,38 +2930,56 @@ private VectorExpression getBetweenFilterExpression(List childExpr return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } + private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { + if (exprNodeDesc instanceof ExprNodeConstantDesc || + exprNodeDesc instanceof ExprNodeColumnDesc) { + return false; + } + return true; // Requires conditional evaluation for good performance. + } + private boolean isNullConst(ExprNodeDesc exprNodeDesc) { //null constant could be typed so we need to check the value if (exprNodeDesc instanceof ExprNodeConstantDesc && ((ExprNodeConstantDesc) exprNodeDesc).getValue() == null) { - return true; + return true; } return false; } - private VectorExpression getWhenExpression(List childExpr, + private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { return null; } - final int size = childExpr.size(); - final ExprNodeDesc whenDesc = childExpr.get(0); - final ExprNodeDesc thenDesc = childExpr.get(1); - final ExprNodeDesc elseDesc; + // Add HiveConf variable with 3 modes: + // 1) adaptor: Always use VectorUDFAdaptor for IF statements. + // + // 2) good: Vectorize but don't optimize conditional expressions + // + // 3) better: Vectorize and Optimize conditional expressions. + // - if (size == 2) { - elseDesc = new ExprNodeConstantDesc(returnType, null); - } else if (size == 3) { - elseDesc = childExpr.get(2); - } else { - final GenericUDFWhen udfWhen = new GenericUDFWhen(); - elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), - childExpr.subList(2, childExpr.size())); + if (hiveVectorIfStmtMode == HiveVectorIfStmtMode.ADAPTOR) { + return null; } - if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + // Align the THEN/ELSE types. + childExpr = + getChildExpressionsWithImplicitCast( + genericUDFIf, + childExpr, + returnType); + + final ExprNodeDesc ifDesc = childExpr.get(0); + final ExprNodeDesc thenDesc = childExpr.get(1); + final ExprNodeDesc elseDesc = childExpr.get(2); + + final boolean isThenNullConst = isNullConst(thenDesc); + final boolean isElseNullConst = isNullConst(elseDesc); + if (isThenNullConst && isElseNullConst) { // THEN NULL ELSE NULL: An unusual "case", but possible. final int outputColumnNum = ocm.allocateOutputColumn(returnType); @@ -2956,17 +2993,32 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } - if (isNullConst(thenDesc)) { - final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + + final boolean isThenCondExpr = isCondExpr(thenDesc); + final boolean isElseCondExpr = isCondExpr(elseDesc); + + final boolean isOnlyGood = (hiveVectorIfStmtMode == HiveVectorIfStmtMode.GOOD); + + if (isThenNullConst) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); final int outputColumnNum = ocm.allocateOutputColumn(returnType); - final VectorExpression resultExpr = - new IfExprNullColumn( - whenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); + final VectorExpression resultExpr; + if (!isElseCondExpr || isOnlyGood) { + resultExpr = + new IfExprNullColumn( + whenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprNullCondExpr( + whenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, elseExpr}); @@ -2984,17 +3036,27 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } - if (isNullConst(elseDesc)) { - final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + + if (isElseNullConst) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); final int outputColumnNum = ocm.allocateOutputColumn(returnType); - final VectorExpression resultExpr = - new IfExprColumnNull( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - outputColumnNum); + final VectorExpression resultExpr; + if (!isThenCondExpr || isOnlyGood) { + resultExpr = + new IfExprColumnNull( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprCondExprNull( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + outputColumnNum); + } resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr}); @@ -3012,11 +3074,91 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } + + if ((isThenCondExpr || isElseCondExpr) && !isOnlyGood) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); + final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); + final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); + + // Only proceed if the THEN/ELSE types were aligned. + if (thenExpr.getOutputColumnVectorType() == elseExpr.getOutputColumnVectorType()) { + + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr; + if (isThenCondExpr && isElseCondExpr) { + resultExpr = + new IfExprCondExprCondExpr( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else if (isThenCondExpr) { + resultExpr = + new IfExprCondExprColumn( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprColumnCondExpr( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } + + resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr, elseExpr}); + + resultExpr.setInputTypeInfos( + whenExpr.getOutputTypeInfo(), + thenExpr.getOutputTypeInfo(), + elseExpr.getOutputTypeInfo()); + resultExpr.setInputDataTypePhysicalVariations( + whenExpr.getOutputDataTypePhysicalVariation(), + thenExpr.getOutputDataTypePhysicalVariation(), + elseExpr.getOutputDataTypePhysicalVariation()); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } + } + + Class udfClass = genericUDFIf.getClass(); + return getVectorExpressionForUdf( + genericUDFIf, udfClass, childExpr, mode, returnType); + } + + private VectorExpression getWhenExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + + if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { + return null; + } + final int size = childExpr.size(); + + final ExprNodeDesc whenDesc = childExpr.get(0); + final ExprNodeDesc thenDesc = childExpr.get(1); + final ExprNodeDesc elseDesc; + + if (size == 2) { + elseDesc = new ExprNodeConstantDesc(returnType, null); + } else if (size == 3) { + elseDesc = childExpr.get(2); + } else { + final GenericUDFWhen udfWhen = new GenericUDFWhen(); + elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), + childExpr.subList(2, childExpr.size())); + } + + // Transform CASE WHEN with just a THEN/ELSE into an IF statement. final GenericUDFIf genericUDFIf = new GenericUDFIf(); - final List ifChildExpr = Arrays.asList(whenDesc, thenDesc, elseDesc); - final ExprNodeGenericFuncDesc exprNodeDesc = - new ExprNodeGenericFuncDesc(returnType, genericUDFIf, "if", ifChildExpr); - return getVectorExpression(exprNodeDesc, mode); + final List ifChildExpr = + Arrays.asList(whenDesc, thenDesc, elseDesc); + return getIfExpression(genericUDFIf, ifChildExpr, mode, returnType); } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java new file mode 100644 index 0000000..94e5190 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do regular execution of the THEN vector expression (a column or scalar) and conditional execution + * of the ELSE vector expression of a SQL IF statement. + */ +public class IfExprColumnCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprColumnCondExpr(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprColumnCondExpr() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + // The THEN expression is either IdentityExpression (a column) or a ConstantVectorExpression + // (a scalar) and trivial to evaluate. + childExpressions[1].evaluate(batch); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java new file mode 100644 index 0000000..abc1343 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Base class that supports conditional execution of the THEN/ELSE vector expressions of + * a SQL IF statement. + */ +public abstract class IfExprCondExprBase extends VectorExpression { + private static final long serialVersionUID = 1L; + + protected final int arg1Column; + + // Whether the IF statement boolean expression was repeating. + protected transient boolean isIfStatementResultRepeated; + protected transient boolean isIfStatementResultThen; + + // The batchIndex for the rows that are for the THEN/ELSE rows respectively. + // Temporary work arrays. + protected transient int thenSelectedCount; + protected transient int[] thenSelected; + protected transient int elseSelectedCount; + protected transient int[] elseSelected; + + public IfExprCondExprBase(int arg1Column, int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + } + + public IfExprCondExprBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + } + + public void conditionalEvaluate(VectorizedRowBatch batch, VectorExpression condVecExpr, + int[] condSelected, int condSize) { + + int saveSize = batch.size; + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = batch.selected; + + batch.size = condSize; + batch.selectedInUse = true; + batch.selected = condSelected; + + condVecExpr.evaluate(batch); + + batch.size = saveSize; + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + // NOTE: We do conditional vector expression so we do not call super.evaluateChildren(batch). + + thenSelectedCount = 0; + elseSelectedCount = 0; + isIfStatementResultRepeated = false; + isIfStatementResultThen = false; // Give it a value. + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + // Child #1 is the IF boolean expression. + childExpressions[0].evaluate(batch); + LongColumnVector ifExprColVector = (LongColumnVector) batch.cols[arg1Column]; + if (ifExprColVector.isRepeating) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = + ((ifExprColVector.noNulls || !ifExprColVector.isNull[0]) && + ifExprColVector.vector[0] == 1); + return; + } + + if (thenSelected == null || n > thenSelected.length) { + + // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE). + final int roundUpSize = + ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) + * VectorizedRowBatch.DEFAULT_SIZE; + thenSelected = new int[roundUpSize]; + elseSelected = new int[roundUpSize]; + } + + int[] sel = batch.selected; + long[] vector = ifExprColVector.vector; + + if (ifExprColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + if (vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } + } else { + boolean[] isNull = ifExprColVector.isNull; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + if (!isNull[i] && vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!isNull[i] && vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } + } + + if (thenSelectedCount == 0) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = false; + } else if (elseSelectedCount == 0) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = true; + } + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // Descriptor is not defined because it takes variable number of arguments with different + // data types. + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java new file mode 100644 index 0000000..cc465c1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN/ vector expression and regular execution of the ELSE + * vector expression (a column or scalar) of a SQL IF statement. + */ +public class IfExprCondExprColumn extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprCondExprColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprCondExprColumn() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + // The ELSE expression is either IdentityExpression (a column) or a ConstantVectorExpression + // (a scalar) and trivial to evaluate. + childExpressions[2].evaluate(batch); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", " + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java new file mode 100644 index 0000000..7874d5c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN/ELSE vector expressions of a SQL IF statement. + */ +public class IfExprCondExprCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprCondExprCondExpr(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprCondExprCondExpr() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java new file mode 100644 index 0000000..b2bf0e4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN vector expression with NULL ELSE of a SQL IF statement. + */ +public class IfExprCondExprNull extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + + public IfExprCondExprNull(int arg1Column, int arg2Column, int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + } + + public IfExprCondExprNull() { + super(); + + // Dummy final assignments. + arg2Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + outputColVector.noNulls = false; + for (int i = 0; i < elseCount; i++) { + outputColVector.isNull[elseSelected[i]] = true; + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", null"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java new file mode 100644 index 0000000..2ca3388 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of a NULL THEN and a ELSE vector expression of a SQL IF statement. + */ +public class IfExprNullCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg3Column; + + public IfExprNullCondExpr(int arg1Column, int arg3Column, int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg3Column = arg3Column; + } + + public IfExprNullCondExpr() { + super(); + + // Dummy final assignments. + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } else { + // Evaluate ELSE expression (only) and copy all its results. + // Second input parameter but 3rd column. + childExpressions[1].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + outputColVector.noNulls = false; + for (int i = 0; i < thenCount; i++) { + outputColVector.isNull[thenSelected[i]] = true; + } + + // Second input parameter but 3rd column. + conditionalEvaluate(batch, childExpressions[1], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + // Second input parameter but 3rd column. + return getColumnParamString(0, arg1Column) + ", null, " + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/test/queries/clientpositive/vector_case_when_1.q ql/src/test/queries/clientpositive/vector_case_when_1.q new file mode 100644 index 0000000..f081efb --- /dev/null +++ ql/src/test/queries/clientpositive/vector_case_when_1.q @@ -0,0 +1,281 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt; +CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt; +INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); + +SET hive.vectorized.if.expr.mode=adaptor; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + +SET hive.vectorized.if.expr.mode=good; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + +SET hive.vectorized.if.expr.mode=better; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q index 2eb0a0a..565edee 100644 --- ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q +++ ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q @@ -1,27 +1,209 @@ +set hive.cli.print.header=true; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.stats.column.autogather=false; -create table student_2_lines( +-- SORT_QUERY_RESULTS + +create table student_10_lines_txt( name string, age int, gpa double) row format delimited fields terminated by '\001' stored as textfile; -LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines; -analyze table student_2_lines compute statistics; +LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt; +CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt; +INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL); +INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8); +analyze table student_10_lines compute statistics; + +------------------------------------------------------------------------------------------ + +SET hive.vectorized.if.expr.mode=adaptor; + +create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_adaptor; + +SET hive.vectorized.if.expr.mode=good; + +create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_good; + +SET hive.vectorized.if.expr.mode=better; + +create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_better; + +------------------------------------------------------------------------------------------ + +SET hive.vectorized.if.expr.mode=adaptor; + +create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_adaptor; + +SET hive.vectorized.if.expr.mode=good; + +create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_good; + +SET hive.vectorized.if.expr.mode=better; -create table insert_10_1 (a float, b int, c timestamp, d binary); +create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; \ No newline at end of file +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_better; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out new file mode 100644 index 0000000..a838e3b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -0,0 +1,1204 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScalar(col 45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:boolean, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index fd9dacb..3ee55d2 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -764,8 +764,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 8] - selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string + projectedOutputColumnNums: [0, 1, 2, 13] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -924,8 +924,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 8] - selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string + projectedOutputColumnNums: [0, 1, 2, 13] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out index a752dfa..2d23730 100644 --- ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: create table student_2_lines( +PREHOOK: query: create table student_10_lines_txt( name string, age int, gpa double) @@ -7,8 +7,8 @@ fields terminated by '\001' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: create table student_2_lines( +PREHOOK: Output: default@student_10_lines_txt +POSTHOOK: query: create table student_10_lines_txt( name string, age int, gpa double) @@ -17,45 +17,431 @@ fields terminated by '\001' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +POSTHOOK: Output: default@student_10_lines_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt PREHOOK: type: LOAD #### A masked pattern was here #### -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +PREHOOK: Output: default@student_10_lines_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: analyze table student_2_lines compute statistics +POSTHOOK: Output: default@student_10_lines_txt +PREHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@student_10_lines_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@student_10_lines_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: student_10_lines.gpa SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: student_10_lines.name SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:name, type:string, comment:null), ] +student_10_lines_txt.name student_10_lines_txt.age student_10_lines_txt.gpa +PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age EXPRESSION [] +POSTHOOK: Lineage: student_10_lines.gpa EXPRESSION [] +POSTHOOK: Lineage: student_10_lines.name EXPRESSION [] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age SCRIPT [] +POSTHOOK: Lineage: student_10_lines.gpa SCRIPT [] +POSTHOOK: Lineage: student_10_lines.name SCRIPT [] +_col0 _col1 _col2 +PREHOOK: query: analyze table student_10_lines compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: analyze table student_10_lines compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@student_10_lines +student_10_lines.name student_10_lines.age student_10_lines.gpa +PREHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_a_adaptor +POSTHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_a_adaptor +PREHOOK: query: explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0 * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_adaptor + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_adaptor + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_adaptor +POSTHOOK: query: insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_adaptor +POSTHOOK: Lineage: insert_a_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_adaptor +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_a_adaptor +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_adaptor +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_a_adaptor +#### A masked pattern was here #### +insert_a_adaptor.name insert_a_adaptor.age insert_a_adaptor.gpa insert_a_adaptor.a insert_a_adaptor.b insert_a_adaptor.c insert_a_adaptor.d insert_a_adaptor.e insert_a_adaptor.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_a_good +POSTHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_a_good +PREHOOK: query: explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0 * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprColumnNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprColumnNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprColumnNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_good + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_good + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_good +POSTHOOK: query: insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_good +POSTHOOK: Lineage: insert_a_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_good PREHOOK: type: QUERY -PREHOOK: Input: default@student_2_lines -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: analyze table student_2_lines compute statistics +PREHOOK: Input: default@insert_a_good +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_good POSTHOOK: type: QUERY -POSTHOOK: Input: default@student_2_lines -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +POSTHOOK: Input: default@insert_a_good +#### A masked pattern was here #### +insert_a_good.name insert_a_good.age insert_a_good.gpa insert_a_good.a insert_a_good.b insert_a_good.c insert_a_good.d insert_a_good.e insert_a_good.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@insert_10_1 -POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +PREHOOK: Output: default@insert_a_better +POSTHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@insert_10_1 +POSTHOOK: Output: default@insert_a_better PREHOOK: query: explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -74,40 +460,39 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: student_2_lines - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] Select Operator - expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0 * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [2, 1, 5, 8] - selectExpressions: VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprCondExprNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprCondExprNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprCondExprNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_10_1 + name: default.insert_a_better Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true @@ -116,7 +501,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: name:string, age:int, gpa:double partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, timestamp, bigint, string, string] + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] Stage: Stage-2 Dependency Collection @@ -129,29 +514,578 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_10_1 + name: default.insert_a_better Stage: Stage-3 Stats Work Basic Stats Work: -PREHOOK: query: insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines -PREHOOK: type: QUERY -PREHOOK: Input: default@student_2_lines -PREHOOK: Output: default@insert_10_1 -POSTHOOK: query: insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines -POSTHOOK: type: QUERY -POSTHOOK: Input: default@student_2_lines -POSTHOOK: Output: default@insert_10_1 -POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ] -POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] -POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] -POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_better +POSTHOOK: query: insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_better +POSTHOOK: Lineage: insert_a_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_better +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_a_better +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_better +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_a_better +#### A masked pattern was here #### +insert_a_better.name insert_a_better.age insert_a_better.gpa insert_a_better.a insert_a_better.b insert_a_better.c insert_a_better.d insert_a_better.e insert_a_better.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_adaptor +POSTHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_adaptor +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, 2011-01-01 01:01:01.0) (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0 * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_adaptor + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_adaptor + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_adaptor +POSTHOOK: query: insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_adaptor +POSTHOOK: Lineage: insert_b_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_adaptor +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_adaptor +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_adaptor +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_adaptor +#### A masked pattern was here #### +insert_b_adaptor.name insert_b_adaptor.age insert_b_adaptor.gpa insert_b_adaptor.a insert_b_adaptor.b insert_b_adaptor.c insert_b_adaptor.d insert_b_adaptor.e insert_b_adaptor.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL +PREHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_good +POSTHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_good +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, 2011-01-01 01:01:01.0) (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0 * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullColumn(col 12:boolean, null, col 13)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullColumn(col 9:boolean, null, col 15)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullColumn(col 18:boolean, null, col 19)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_good + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_good + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_good +POSTHOOK: query: insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_good +POSTHOOK: Lineage: insert_b_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_good +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_good +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_good +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_good +#### A masked pattern was here #### +insert_b_good.name insert_b_good.age insert_b_good.gpa insert_b_good.a insert_b_good.b insert_b_good.c insert_b_good.d insert_b_good.e insert_b_good.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL +PREHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_better +POSTHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_better +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, 2011-01-01 01:01:01.0) (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0 * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullCondExpr(col 12:boolean, null, col 13:binary)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullCondExpr(col 9:boolean, null, col 15:int)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullCondExpr(col 18:boolean, null, col 19:double)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_better + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_better + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_better +POSTHOOK: query: insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_better +POSTHOOK: Lineage: insert_b_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_better +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_better +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_better +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_better +#### A masked pattern was here #### +insert_b_better.name insert_b_better.age insert_b_better.gpa insert_b_better.a insert_b_better.b insert_b_better.c insert_b_better.d insert_b_better.e insert_b_better.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 8d3f163..de30ca7 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -51,13 +51,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint + aggregators: VectorUDAFCount(col 7:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 49c3036..ff62505 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1602,7 +1602,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongScalarEqualLongColumn(val 3569, col 0:int)(children: col 0:tinyint))) + predicateExpression: FilterExprOrExpr( + children: + FilterStringColLikeStringScalar(col 7:string, pattern %b%), + FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13:decimal(13,3)) + (children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), + FilterDoubleColLessDoubleColumn(col 14:double, col 5:double) + (children: CastLongToDouble(col 3:bigint) -> 14:double), + FilterExprAndExpr( + children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongScalarEqualLongColumn(val 3569, col 0:int)(children: col 0:tinyint))) predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint))) or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index ed17e5c..79cd710 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -97,7 +97,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -217,8 +217,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -246,7 +246,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -594,7 +594,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -674,8 +674,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -703,7 +703,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -755,8 +755,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0)col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -784,7 +784,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -855,7 +855,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -936,7 +936,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1017,7 +1017,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 8fff1ed..581518b 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -96,7 +96,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -216,8 +216,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -244,7 +244,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -588,7 +588,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -667,8 +667,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -695,7 +695,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -747,8 +747,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0)col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -775,7 +775,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -846,7 +846,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -926,7 +926,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1006,7 +1006,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_case_when_1.q.out ql/src/test/results/clientpositive/vector_case_when_1.q.out new file mode 100644 index 0000000..3c7363b --- /dev/null +++ ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -0,0 +1,1126 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScalar(col 45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0 - l_discount))) ELSE (0.0) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), 2009-01-01, 2009-12-31) (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:boolean, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index d7cc0b7..13fb6d1 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -45,13 +45,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint + aggregators: VectorUDAFCount(col 7:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index b89fa51..e8ad2ca 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -65,8 +65,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -93,7 +93,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -210,8 +210,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -238,7 +238,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -525,7 +525,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -601,8 +601,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -629,7 +629,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -678,8 +678,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0)col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -706,7 +706,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -774,7 +774,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -851,7 +851,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -928,7 +928,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false