diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1e322b8..edea129 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2978,7 +2978,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Which vectorized input format support features are enabled for vectorization.\n" + "That is, if a VectorizedInputFormat input format does support \"decimal_64\" for example\n" + "this variable must enable that to be used in vectorization"), - + HIVE_VECTORIZED_IF_EXPR_MODE("hive.vectorized.if.expr.mode", "better", new StringSet("adaptor", "good", "better"), + "Specifies the extent to which SQL IF statements will be vectorized.\n" + + "0. adaptor: only use the VectorUDFAdaptor to vectorize IF statements\n" + + "1. good : use regular vectorized IF expression classes that get good performance\n" + + "2. better : use vectorized IF expression classes that conditionally execute THEN/ELSE\n" + + " expressions for better performance.\n"), HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE("hive.test.vectorized.execution.enabled.override", "none", new StringSet("none", "enable", "disable"), "internal use only, used to override the hive.vectorized.execution.enabled setting and\n" + diff --git data/files/student_10_lines data/files/student_10_lines new file mode 100644 index 0000000..2f1b331 --- /dev/null +++ data/files/student_10_lines @@ -0,0 +1,10 @@ +tom thompson420.53 +luke king280.47 +priscilla falkner551.16 +luke brown601.14 +ulysses garcia352.74 +calvin brown282.70 +oscar thompson352.98 +xavier garcia331.06 +nick johnson34 +quinn ovid19 diff --git data/files/student_2_lines data/files/student_2_lines deleted file mode 100644 index 9e86836..0000000 --- data/files/student_2_lines +++ /dev/null @@ -1,2 +0,0 @@ -tom thompson420.53 -luke king280.47 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 544c836..9d80920 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -314,6 +314,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_binary_join_groupby.q,\ vector_bround.q,\ vector_bucket.q,\ + vector_case_when_1.q,\ + vector_case_when_2.q,\ vector_cast_constant.q,\ vector_char_2.q,\ vector_char_4.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 4df6e97..93212ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -133,6 +133,21 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { } private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + + public enum HiveVectorIfStmtMode { + ADAPTOR, + GOOD, + BETTER; + + public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) { + String string = HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE); + return valueOf(string.toUpperCase()); + } + } + + private HiveVectorIfStmtMode hiveVectorIfStmtMode; + //when set to true use the overflow checked vector expressions private boolean useCheckedVectorExpressions; @@ -141,6 +156,7 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { private void setHiveConfVars(HiveConf hiveConf) { hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); + hiveVectorIfStmtMode = HiveVectorIfStmtMode.getHiveConfValue(hiveConf); this.reuseScratchColumns = HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); this.ocm.setReuseColumns(reuseScratchColumns); @@ -150,6 +166,7 @@ private void setHiveConfVars(HiveConf hiveConf) { private void copyHiveConfVars(VectorizationContext vContextEnvironment) { hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; + hiveVectorIfStmtMode = vContextEnvironment.hiveVectorIfStmtMode; this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns; this.ocm.setReuseColumns(reuseScratchColumns); } @@ -765,7 +782,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress // and that would require converting their data types to evaluate the udf. // For example decimal column added to an integer column would require integer column to be // cast to decimal. - // Note: this is a no-op for custom UDFs + // Note: this is a no-op for custom UDFs List childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(), exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), @@ -1978,6 +1995,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); + } else if (udf instanceof GenericUDFIf) { + ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType); } else if (udf instanceof GenericUDFWhen) { ve = getWhenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { @@ -2911,38 +2930,56 @@ private VectorExpression getBetweenFilterExpression(List childExpr return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } + private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { + if (exprNodeDesc instanceof ExprNodeConstantDesc || + exprNodeDesc instanceof ExprNodeColumnDesc) { + return false; + } + return true; // Requires conditional evaluation for good performance. + } + private boolean isNullConst(ExprNodeDesc exprNodeDesc) { //null constant could be typed so we need to check the value if (exprNodeDesc instanceof ExprNodeConstantDesc && ((ExprNodeConstantDesc) exprNodeDesc).getValue() == null) { - return true; + return true; } return false; } - private VectorExpression getWhenExpression(List childExpr, + private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { return null; } - final int size = childExpr.size(); - final ExprNodeDesc whenDesc = childExpr.get(0); - final ExprNodeDesc thenDesc = childExpr.get(1); - final ExprNodeDesc elseDesc; + // Add HiveConf variable with 3 modes: + // 1) adaptor: Always use VectorUDFAdaptor for IF statements. + // + // 2) good: Vectorize but don't optimize conditional expressions + // + // 3) better: Vectorize and Optimize conditional expressions. + // - if (size == 2) { - elseDesc = new ExprNodeConstantDesc(returnType, null); - } else if (size == 3) { - elseDesc = childExpr.get(2); - } else { - final GenericUDFWhen udfWhen = new GenericUDFWhen(); - elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), - childExpr.subList(2, childExpr.size())); + if (hiveVectorIfStmtMode == HiveVectorIfStmtMode.ADAPTOR) { + return null; } - if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + // Align the THEN/ELSE types. + childExpr = + getChildExpressionsWithImplicitCast( + genericUDFIf, + childExpr, + returnType); + + final ExprNodeDesc ifDesc = childExpr.get(0); + final ExprNodeDesc thenDesc = childExpr.get(1); + final ExprNodeDesc elseDesc = childExpr.get(2); + + final boolean isThenNullConst = isNullConst(thenDesc); + final boolean isElseNullConst = isNullConst(elseDesc); + if (isThenNullConst && isElseNullConst) { // THEN NULL ELSE NULL: An unusual "case", but possible. final int outputColumnNum = ocm.allocateOutputColumn(returnType); @@ -2956,17 +2993,32 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } - if (isNullConst(thenDesc)) { - final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + + final boolean isThenCondExpr = isCondExpr(thenDesc); + final boolean isElseCondExpr = isCondExpr(elseDesc); + + final boolean isOnlyGood = (hiveVectorIfStmtMode == HiveVectorIfStmtMode.GOOD); + + if (isThenNullConst) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); final int outputColumnNum = ocm.allocateOutputColumn(returnType); - final VectorExpression resultExpr = - new IfExprNullColumn( - whenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); + final VectorExpression resultExpr; + if (!isElseCondExpr || isOnlyGood) { + resultExpr = + new IfExprNullColumn( + whenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprNullCondExpr( + whenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, elseExpr}); @@ -2984,17 +3036,27 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } - if (isNullConst(elseDesc)) { - final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); + + if (isElseNullConst) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); final int outputColumnNum = ocm.allocateOutputColumn(returnType); - final VectorExpression resultExpr = - new IfExprColumnNull( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - outputColumnNum); + final VectorExpression resultExpr; + if (!isThenCondExpr || isOnlyGood) { + resultExpr = + new IfExprColumnNull( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprCondExprNull( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + outputColumnNum); + } resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr}); @@ -3012,11 +3074,91 @@ private VectorExpression getWhenExpression(List childExpr, return resultExpr; } + + if ((isThenCondExpr || isElseCondExpr) && !isOnlyGood) { + final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); + final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); + final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); + + // Only proceed if the THEN/ELSE types were aligned. + if (thenExpr.getOutputColumnVectorType() == elseExpr.getOutputColumnVectorType()) { + + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr; + if (isThenCondExpr && isElseCondExpr) { + resultExpr = + new IfExprCondExprCondExpr( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else if (isThenCondExpr) { + resultExpr = + new IfExprCondExprColumn( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } else { + resultExpr = + new IfExprColumnCondExpr( + whenExpr.getOutputColumnNum(), + thenExpr.getOutputColumnNum(), + elseExpr.getOutputColumnNum(), + outputColumnNum); + } + + resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr, elseExpr}); + + resultExpr.setInputTypeInfos( + whenExpr.getOutputTypeInfo(), + thenExpr.getOutputTypeInfo(), + elseExpr.getOutputTypeInfo()); + resultExpr.setInputDataTypePhysicalVariations( + whenExpr.getOutputDataTypePhysicalVariation(), + thenExpr.getOutputDataTypePhysicalVariation(), + elseExpr.getOutputDataTypePhysicalVariation()); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } + } + + Class udfClass = genericUDFIf.getClass(); + return getVectorExpressionForUdf( + genericUDFIf, udfClass, childExpr, mode, returnType); + } + + private VectorExpression getWhenExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + + if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { + return null; + } + final int size = childExpr.size(); + + final ExprNodeDesc whenDesc = childExpr.get(0); + final ExprNodeDesc thenDesc = childExpr.get(1); + final ExprNodeDesc elseDesc; + + if (size == 2) { + elseDesc = new ExprNodeConstantDesc(returnType, null); + } else if (size == 3) { + elseDesc = childExpr.get(2); + } else { + final GenericUDFWhen udfWhen = new GenericUDFWhen(); + elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), + childExpr.subList(2, childExpr.size())); + } + + // Transform CASE WHEN with just a THEN/ELSE into an IF statement. final GenericUDFIf genericUDFIf = new GenericUDFIf(); - final List ifChildExpr = Arrays.asList(whenDesc, thenDesc, elseDesc); - final ExprNodeGenericFuncDesc exprNodeDesc = - new ExprNodeGenericFuncDesc(returnType, genericUDFIf, "if", ifChildExpr); - return getVectorExpression(exprNodeDesc, mode); + final List ifChildExpr = + Arrays.asList(whenDesc, thenDesc, elseDesc); + return getIfExpression(genericUDFIf, ifChildExpr, mode, returnType); } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java new file mode 100644 index 0000000..94e5190 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnCondExpr.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do regular execution of the THEN vector expression (a column or scalar) and conditional execution + * of the ELSE vector expression of a SQL IF statement. + */ +public class IfExprColumnCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprColumnCondExpr(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprColumnCondExpr() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + // The THEN expression is either IdentityExpression (a column) or a ConstantVectorExpression + // (a scalar) and trivial to evaluate. + childExpressions[1].evaluate(batch); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java new file mode 100644 index 0000000..abc1343 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprBase.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Base class that supports conditional execution of the THEN/ELSE vector expressions of + * a SQL IF statement. + */ +public abstract class IfExprCondExprBase extends VectorExpression { + private static final long serialVersionUID = 1L; + + protected final int arg1Column; + + // Whether the IF statement boolean expression was repeating. + protected transient boolean isIfStatementResultRepeated; + protected transient boolean isIfStatementResultThen; + + // The batchIndex for the rows that are for the THEN/ELSE rows respectively. + // Temporary work arrays. + protected transient int thenSelectedCount; + protected transient int[] thenSelected; + protected transient int elseSelectedCount; + protected transient int[] elseSelected; + + public IfExprCondExprBase(int arg1Column, int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + } + + public IfExprCondExprBase() { + super(); + + // Dummy final assignments. + arg1Column = -1; + } + + public void conditionalEvaluate(VectorizedRowBatch batch, VectorExpression condVecExpr, + int[] condSelected, int condSize) { + + int saveSize = batch.size; + boolean saveSelectedInUse = batch.selectedInUse; + int[] saveSelected = batch.selected; + + batch.size = condSize; + batch.selectedInUse = true; + batch.selected = condSelected; + + condVecExpr.evaluate(batch); + + batch.size = saveSize; + batch.selectedInUse = saveSelectedInUse; + batch.selected = saveSelected; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + // NOTE: We do conditional vector expression so we do not call super.evaluateChildren(batch). + + thenSelectedCount = 0; + elseSelectedCount = 0; + isIfStatementResultRepeated = false; + isIfStatementResultThen = false; // Give it a value. + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + // Child #1 is the IF boolean expression. + childExpressions[0].evaluate(batch); + LongColumnVector ifExprColVector = (LongColumnVector) batch.cols[arg1Column]; + if (ifExprColVector.isRepeating) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = + ((ifExprColVector.noNulls || !ifExprColVector.isNull[0]) && + ifExprColVector.vector[0] == 1); + return; + } + + if (thenSelected == null || n > thenSelected.length) { + + // (Re)allocate larger to be a multiple of 1024 (DEFAULT_SIZE). + final int roundUpSize = + ((n + VectorizedRowBatch.DEFAULT_SIZE - 1) / VectorizedRowBatch.DEFAULT_SIZE) + * VectorizedRowBatch.DEFAULT_SIZE; + thenSelected = new int[roundUpSize]; + elseSelected = new int[roundUpSize]; + } + + int[] sel = batch.selected; + long[] vector = ifExprColVector.vector; + + if (ifExprColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + if (vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } + } else { + boolean[] isNull = ifExprColVector.isNull; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + final int i = sel[j]; + if (!isNull[i] && vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!isNull[i] && vector[i] == 1) { + thenSelected[thenSelectedCount++] = i; + } else { + elseSelected[elseSelectedCount++] = i; + } + } + } + } + + if (thenSelectedCount == 0) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = false; + } else if (elseSelectedCount == 0) { + isIfStatementResultRepeated = true; + isIfStatementResultThen = true; + } + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // Descriptor is not defined because it takes variable number of arguments with different + // data types. + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java new file mode 100644 index 0000000..cc465c1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprColumn.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN/ vector expression and regular execution of the ELSE + * vector expression (a column or scalar) of a SQL IF statement. + */ +public class IfExprCondExprColumn extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprCondExprColumn(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprCondExprColumn() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + // The ELSE expression is either IdentityExpression (a column) or a ConstantVectorExpression + // (a scalar) and trivial to evaluate. + childExpressions[2].evaluate(batch); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", " + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java new file mode 100644 index 0000000..7874d5c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprCondExpr.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN/ELSE vector expressions of a SQL IF statement. + */ +public class IfExprCondExprCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + protected final int arg3Column; + + public IfExprCondExprCondExpr(int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public IfExprCondExprCondExpr() { + super(); + + // Dummy final assignments. + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + // Evaluate ELSE expression (only) and copy all its results. + childExpressions[2].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + conditionalEvaluate(batch, childExpressions[2], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java new file mode 100644 index 0000000..b2bf0e4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCondExprNull.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of the THEN vector expression with NULL ELSE of a SQL IF statement. + */ +public class IfExprCondExprNull extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg2Column; + + public IfExprCondExprNull(int arg1Column, int arg2Column, int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg2Column = arg2Column; + } + + public IfExprCondExprNull() { + super(); + + // Dummy final assignments. + arg2Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector thenColVector = batch.cols[arg2Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + // Evaluate THEN expression (only) and copy all its results. + childExpressions[1].evaluate(batch); + thenColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + conditionalEvaluate(batch, childExpressions[1], thenSelected, thenCount); + for (int i = 0; i < thenCount; i++) { + final int batchIndex = thenSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, thenColVector); + } + + outputColVector.noNulls = false; + for (int i = 0; i < elseCount; i++) { + outputColVector.isNull[elseSelected[i]] = true; + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", null"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java new file mode 100644 index 0000000..2ca3388 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullCondExpr.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Do conditional execution of a NULL THEN and a ELSE vector expression of a SQL IF statement. + */ +public class IfExprNullCondExpr extends IfExprCondExprBase { + private static final long serialVersionUID = 1L; + + protected final int arg3Column; + + public IfExprNullCondExpr(int arg1Column, int arg3Column, int outputColumnNum) { + super(arg1Column, outputColumnNum); + this.arg3Column = arg3Column; + } + + public IfExprNullCondExpr() { + super(); + + // Dummy final assignments. + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + int n = batch.size; + if (n <= 0) { + // Nothing to do + return; + } + + /* + * Do common analysis of the IF statement boolean expression. + * + * The following protected members can be examined afterwards: + * + * boolean isIfStatementResultRepeated + * boolean isIfStatementResultThen + * + * int thenSelectedCount + * int[] thenSelected + * int elseSelectedCount + * int[] elseSelected + */ + super.evaluate(batch); + + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // CONSIDER: Should be do this for all vector expressions that can + // work on BytesColumnVector output columns??? + outputColVector.init(); + + ColumnVector elseColVector = batch.cols[arg3Column]; + + final int thenCount = thenSelectedCount; + final int elseCount = elseSelectedCount; + + if (isIfStatementResultRepeated) { + if (isIfStatementResultThen) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } else { + // Evaluate ELSE expression (only) and copy all its results. + // Second input parameter but 3rd column. + childExpressions[1].evaluate(batch); + elseColVector.copySelected(batch.selectedInUse, batch.selected, n, outputColVector); + } + return; + } + + // NOTE: We cannot use copySelected below since it is a whole column operation. + + outputColVector.noNulls = false; + for (int i = 0; i < thenCount; i++) { + outputColVector.isNull[thenSelected[i]] = true; + } + + // Second input parameter but 3rd column. + conditionalEvaluate(batch, childExpressions[1], elseSelected, elseCount); + for (int i = 0; i < elseCount; i++) { + final int batchIndex = elseSelected[i]; + outputIsNull[batchIndex] = false; + outputColVector.setElement(batchIndex, batchIndex, elseColVector); + } + } + + @Override + public String vectorExpressionParameters() { + // Second input parameter but 3rd column. + return getColumnParamString(0, arg1Column) + ", null, " + getColumnParamString(2, arg3Column); + } +} diff --git ql/src/test/queries/clientpositive/vector_case_when_1.q ql/src/test/queries/clientpositive/vector_case_when_1.q new file mode 100644 index 0000000..f081efb --- /dev/null +++ ql/src/test/queries/clientpositive/vector_case_when_1.q @@ -0,0 +1,281 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt; +CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt; +INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); + +SET hive.vectorized.if.expr.mode=adaptor; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + +SET hive.vectorized.if.expr.mode=good; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + +SET hive.vectorized.if.expr.mode=better; + +EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity; + \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_case_when_2.q ql/src/test/queries/clientpositive/vector_case_when_2.q new file mode 100644 index 0000000..6854fc0 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_case_when_2.q @@ -0,0 +1,208 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt; + +create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc; +insert overwrite table timestamps + select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt; + +INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL); + +SET hive.vectorized.if.expr.mode=adaptor; + +EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; + +SET hive.vectorized.if.expr.mode=good; + +EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; + +SET hive.vectorized.if.expr.mode=better; + +EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2; + \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q index 2eb0a0a..565edee 100644 --- ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q +++ ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q @@ -1,27 +1,209 @@ +set hive.cli.print.header=true; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.stats.column.autogather=false; -create table student_2_lines( +-- SORT_QUERY_RESULTS + +create table student_10_lines_txt( name string, age int, gpa double) row format delimited fields terminated by '\001' stored as textfile; -LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines; -analyze table student_2_lines compute statistics; +LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt; +CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt; +INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL); +INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8); +analyze table student_10_lines compute statistics; + +------------------------------------------------------------------------------------------ + +SET hive.vectorized.if.expr.mode=adaptor; + +create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_adaptor; + +SET hive.vectorized.if.expr.mode=good; + +create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_good; + +SET hive.vectorized.if.expr.mode=better; + +create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines; +select * from insert_a_better; + +------------------------------------------------------------------------------------------ + +SET hive.vectorized.if.expr.mode=adaptor; + +create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_adaptor; + +SET hive.vectorized.if.expr.mode=good; + +create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); + +explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_good; + +SET hive.vectorized.if.expr.mode=better; -create table insert_10_1 (a float, b int, c timestamp, d binary); +create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double); explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines; \ No newline at end of file +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines; +select * from insert_b_better; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out new file mode 100644 index 0000000..bd590e2 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -0,0 +1,1204 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScalar(col 45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:boolean, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 17 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:date, VALUE._col4:double, VALUE._col5:double, VALUE._col6:decimal(10,2), VALUE._col7:decimal(10,2), VALUE._col8:decimal(12,2), VALUE._col9:decimal(12,2), VALUE._col10:decimal(10,2), VALUE._col11:decimal(10,2), VALUE._col12:timestamp, VALUE._col13:int, VALUE._col14:int, VALUE._col15:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out new file mode 100644 index 0000000..38187bb --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -0,0 +1,884 @@ +PREHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps_txt +POSTHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@timestamps_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@timestamps_txt +PREHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps +POSTHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps +PREHOOK: query: insert overwrite table timestamps + select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps_txt +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert overwrite table timestamps + select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps_txt +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.cdate EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.ctimestamp1 SIMPLE [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +tsval tsval _c2 _c3 +PREHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps +POSTHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.cdate EXPRESSION [] +POSTHOOK: Lineage: timestamps.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct] + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [10, 12, 13, 14, 11, 7, 16, 23] + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct] + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [15, 26, 36, 40, 42, 44, 46, 53] + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, KEY.reducesinkkey2:timestamp, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:int, VALUE._col4:string, VALUE._col5:int, VALUE._col6:int, VALUE._col7:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index e57a0da..9f9fdaf 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -764,8 +764,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 8] - selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string + projectedOutputColumnNums: [0, 1, 2, 13] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -924,8 +924,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 8] - selectExpressions: IfExprStringScalarStringGroupColumn(col 3:boolean, val 0col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, IfExprStringScalarStringGroupColumn(col 4:boolean, val 1col 8:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 4:boolean, IfExprStringScalarStringGroupColumn(col 5:boolean, val 2col 7:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 5:boolean, IfExprStringScalarStringScalar(col 6:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 6:boolean) -> 7:string) -> 8:string) -> 7:string) -> 8:string + projectedOutputColumnNums: [0, 1, 2, 13] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:stringcol 12:string)(children: LongColEqualLongScalar(col 2:bigint, val 0) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:string, IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 11:string)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 5:boolean, ConstantVectorExpression(val 1) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 10:string)(children: LongColEqualLongScalar(col 2:bigint, val 2) -> 7:boolean, ConstantVectorExpression(val 2) -> 8:string, IfExprStringScalarStringScalar(col 9:boolean, val 3, val nothing)(children: LongColEqualLongScalar(col 2:bigint, val 3) -> 9:boolean) -> 10:string) -> 11:string) -> 12:string) -> 13:string Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out index 7402667..4e36f37 100644 --- ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: create table student_2_lines( +PREHOOK: query: create table student_10_lines_txt( name string, age int, gpa double) @@ -7,8 +7,8 @@ fields terminated by '\001' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: create table student_2_lines( +PREHOOK: Output: default@student_10_lines_txt +POSTHOOK: query: create table student_10_lines_txt( name string, age int, gpa double) @@ -17,45 +17,431 @@ fields terminated by '\001' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +POSTHOOK: Output: default@student_10_lines_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt PREHOOK: type: LOAD #### A masked pattern was here #### -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines +PREHOOK: Output: default@student_10_lines_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_10_lines' OVERWRITE INTO TABLE student_10_lines_txt POSTHOOK: type: LOAD #### A masked pattern was here #### -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: analyze table student_2_lines compute statistics +POSTHOOK: Output: default@student_10_lines_txt +PREHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@student_10_lines_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: CREATE TABLE student_10_lines STORED AS ORC AS SELECT * FROM student_10_lines_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@student_10_lines_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: student_10_lines.gpa SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: student_10_lines.name SIMPLE [(student_10_lines_txt)student_10_lines_txt.FieldSchema(name:name, type:string, comment:null), ] +student_10_lines_txt.name student_10_lines_txt.age student_10_lines_txt.gpa +PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age EXPRESSION [] +POSTHOOK: Lineage: student_10_lines.gpa EXPRESSION [] +POSTHOOK: Lineage: student_10_lines.name EXPRESSION [] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: INSERT INTO TABLE student_10_lines VALUES ("George", 22, 3.8) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@student_10_lines +POSTHOOK: Lineage: student_10_lines.age SCRIPT [] +POSTHOOK: Lineage: student_10_lines.gpa SCRIPT [] +POSTHOOK: Lineage: student_10_lines.name SCRIPT [] +_col0 _col1 _col2 +PREHOOK: query: analyze table student_10_lines compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@student_10_lines +POSTHOOK: query: analyze table student_10_lines compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@student_10_lines +student_10_lines.name student_10_lines.age student_10_lines.gpa +PREHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_a_adaptor +POSTHOOK: query: create table insert_a_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_a_adaptor +PREHOOK: query: explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_adaptor + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_adaptor + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_adaptor +POSTHOOK: query: insert overwrite table insert_a_adaptor + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_adaptor +POSTHOOK: Lineage: insert_a_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_adaptor +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_a_adaptor +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_adaptor +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_a_adaptor +#### A masked pattern was here #### +insert_a_adaptor.name insert_a_adaptor.age insert_a_adaptor.gpa insert_a_adaptor.a insert_a_adaptor.b insert_a_adaptor.c insert_a_adaptor.d insert_a_adaptor.e insert_a_adaptor.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_a_good +POSTHOOK: query: create table insert_a_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_a_good +PREHOOK: query: explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprColumnNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprColumnNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprColumnNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_good + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_a_good + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_good +POSTHOOK: query: insert overwrite table insert_a_good + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_good +POSTHOOK: Lineage: insert_a_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_good PREHOOK: type: QUERY -PREHOOK: Input: default@student_2_lines -PREHOOK: Output: default@student_2_lines -POSTHOOK: query: analyze table student_2_lines compute statistics +PREHOOK: Input: default@insert_a_good +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_good POSTHOOK: type: QUERY -POSTHOOK: Input: default@student_2_lines -POSTHOOK: Output: default@student_2_lines -PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +POSTHOOK: Input: default@insert_a_good +#### A masked pattern was here #### +insert_a_good.name insert_a_good.age insert_a_good.gpa insert_a_good.a insert_a_good.b insert_a_good.c insert_a_good.d insert_a_good.e insert_a_good.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@insert_10_1 -POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary) +PREHOOK: Output: default@insert_a_better +POSTHOOK: query: create table insert_a_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@insert_10_1 +POSTHOOK: Output: default@insert_a_better PREHOOK: query: explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines +insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -74,40 +460,39 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: student_2_lines - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] Select Operator - expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), age, null) (type: int), if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null) (type: timestamp), if((length(name) > 8), name, null) (type: string), if((length(name) < 8), CAST( name AS BINARY), null) (type: binary), if((age > 40), length(name), null) (type: int), if((length(name) > 10), (2.0D * gpa), null) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [2, 1, 5, 8] - selectExpressions: VectorUDFAdaptor(if((age > 40), TIMESTAMP'2011-01-01 01:01:01.0', null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprColumnNull(col 4:boolean, col 1:int, null)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprColumnNull(col 6:boolean, col 7:timestamp, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprColumnNull(col 10:boolean, col 0:string, null)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprCondExprNull(col 12:boolean, col 13:binary, null)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprCondExprNull(col 9:boolean, col 15:int, null)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprCondExprNull(col 18:boolean, col 19:double, null)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_10_1 + name: default.insert_a_better Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true @@ -116,7 +501,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: name:string, age:int, gpa:double partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, timestamp, bigint, string, string] + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] Stage: Stage-2 Dependency Collection @@ -129,29 +514,578 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_10_1 + name: default.insert_a_better Stage: Stage-3 Stats Work Basic Stats Work: -PREHOOK: query: insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines -PREHOOK: type: QUERY -PREHOOK: Input: default@student_2_lines -PREHOOK: Output: default@insert_10_1 -POSTHOOK: query: insert overwrite table insert_10_1 - select cast(gpa as float), - age, - IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL), - IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines -POSTHOOK: type: QUERY -POSTHOOK: Input: default@student_2_lines -POSTHOOK: Output: default@insert_10_1 -POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ] -POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] -POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ] -POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_a_better +POSTHOOK: query: insert overwrite table insert_a_better + select + name, + age, + gpa, + IF(age<40, age, NULL), + IF(age>40, cast('2011-01-01 01:01:01' as timestamp), NULL), + IF(LENGTH(name)>8, name, NULL), + IF(LENGTH(name)<8, cast(name as binary), NULL), + IF(age>40, LENGTH(name), NULL), + IF(LENGTH(name)> 10, 2 * gpa, NULL) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_a_better +POSTHOOK: Lineage: insert_a_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_a_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_a_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_a_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_a_better +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_a_better +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_a_better +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_a_better +#### A masked pattern was here #### +insert_a_better.name insert_a_better.age insert_a_better.gpa insert_a_better.a insert_a_better.b insert_a_better.c insert_a_better.d insert_a_better.e insert_a_better.f +George 22 3.8 22 NULL NULL George NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +calvin brown 28 2.7 28 NULL calvin brown NULL NULL 5.4 +luke brown 60 1.14 NULL 2011-01-01 01:01:01 luke brown NULL 10 NULL +luke king 28 0.47 28 NULL luke king NULL NULL NULL +nick johnson 34 NULL 34 NULL nick johnson NULL NULL NULL +oscar thompson 35 2.98 35 NULL oscar thompson NULL NULL 5.96 +priscilla falkner 55 1.16 NULL 2011-01-01 01:01:01 priscilla falkner NULL 17 2.32 +quinn ovid 19 NULL 19 NULL quinn ovid NULL NULL NULL +tom thompson 42 0.53 NULL 2011-01-01 01:01:01 tom thompson NULL 12 1.06 +ulysses garcia 35 2.74 35 NULL ulysses garcia NULL NULL 5.48 +xavier garcia 33 1.06 33 NULL xavier garcia NULL NULL 2.12 +PREHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_adaptor +POSTHOOK: query: create table insert_b_adaptor (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_adaptor +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_adaptor + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_adaptor + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_adaptor +POSTHOOK: query: insert overwrite table insert_b_adaptor + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_adaptor +POSTHOOK: Lineage: insert_b_adaptor.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_adaptor.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_adaptor +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_adaptor +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_adaptor +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_adaptor +#### A masked pattern was here #### +insert_b_adaptor.name insert_b_adaptor.age insert_b_adaptor.gpa insert_b_adaptor.a insert_b_adaptor.b insert_b_adaptor.c insert_b_adaptor.d insert_b_adaptor.e insert_b_adaptor.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL +PREHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_good +POSTHOOK: query: create table insert_b_good (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_good +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullColumn(col 12:boolean, null, col 13)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullColumn(col 9:boolean, null, col 15)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullColumn(col 18:boolean, null, col 19)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_good + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_good + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_good +POSTHOOK: query: insert overwrite table insert_b_good + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_good +POSTHOOK: Lineage: insert_b_good.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_good.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_good.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_good.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_good.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_good +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_good +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_good +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_good +#### A masked pattern was here #### +insert_b_good.name insert_b_good.age insert_b_good.gpa insert_b_good.a insert_b_good.b insert_b_good.c insert_b_good.d insert_b_good.e insert_b_good.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL +PREHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_b_better +POSTHOOK: query: create table insert_b_better (name string, age int, gpa double, a int, b timestamp, c string, d binary, e int, f double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_b_better +PREHOOK: query: explain vectorization detail +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: student_10_lines + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct] + Select Operator + expressions: name (type: string), age (type: int), gpa (type: double), if((age < 40), null, age) (type: int), if((age > 40), null, TIMESTAMP'2011-01-01 01:01:01.0') (type: timestamp), if((length(name) > 8), null, name) (type: string), if((length(name) < 8), null, CAST( name AS BINARY)) (type: binary), if((age > 40), null, length(name)) (type: int), if((length(name) > 10), null, (2.0D * gpa)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5, 8, 11, 14, 16, 20] + selectExpressions: IfExprNullColumn(col 4:boolean, null, col 1)(children: LongColLessLongScalar(col 1:int, val 40) -> 4:boolean, col 1:int) -> 5:int, IfExprNullColumn(col 6:boolean, null, col 7)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 6:boolean, ConstantVectorExpression(val 2011-01-01 01:01:01.0) -> 7:timestamp) -> 8:timestamp, IfExprNullColumn(col 10:boolean, null, col 0)(children: LongColGreaterLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 10:boolean, col 0:string) -> 11:string, IfExprNullCondExpr(col 12:boolean, null, col 13:binary)(children: LongColLessLongScalar(col 9:int, val 8)(children: StringLength(col 0:string) -> 9:int) -> 12:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 13:binary) -> 14:binary, IfExprNullCondExpr(col 9:boolean, null, col 15:int)(children: LongColGreaterLongScalar(col 1:int, val 40) -> 9:boolean, StringLength(col 0:string) -> 15:int) -> 16:int, IfExprNullCondExpr(col 18:boolean, null, col 19:double)(children: LongColGreaterLongScalar(col 17:int, val 10)(children: StringLength(col 0:string) -> 17:int) -> 18:boolean, DoubleScalarMultiplyDoubleColumn(val 2.0, col 2:double) -> 19:double) -> 20:double + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_better + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: name:string, age:int, gpa:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, timestamp, timestamp, bigint, bigint, string, bigint, string, string, bigint, bigint, bigint, bigint, double, double] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.insert_b_better + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +PREHOOK: type: QUERY +PREHOOK: Input: default@student_10_lines +PREHOOK: Output: default@insert_b_better +POSTHOOK: query: insert overwrite table insert_b_better + select + name, + age, + gpa, + IF(age<40, NULL, age), + IF(age>40, NULL, cast('2011-01-01 01:01:01' as timestamp)), + IF(LENGTH(name)>8, NULL, name), + IF(LENGTH(name)<8, NULL, cast(name as binary)), + IF(age>40, NULL, LENGTH(name)), + IF(LENGTH(name)> 10, NULL, 2 * gpa) + from student_10_lines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_10_lines +POSTHOOK: Output: default@insert_b_better +POSTHOOK: Lineage: insert_b_better.a EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.age SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.b EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: insert_b_better.c EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.d EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.e EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:age, type:int, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: insert_b_better.f EXPRESSION [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), (student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_better.gpa SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:gpa, type:double, comment:null), ] +POSTHOOK: Lineage: insert_b_better.name SIMPLE [(student_10_lines)student_10_lines.FieldSchema(name:name, type:string, comment:null), ] +name age gpa _c3 _c4 _c5 _c6 _c7 _c8 +PREHOOK: query: select * from insert_b_better +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_b_better +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_b_better +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_b_better +#### A masked pattern was here #### +insert_b_better.name insert_b_better.age insert_b_better.gpa insert_b_better.a insert_b_better.b insert_b_better.c insert_b_better.d insert_b_better.e insert_b_better.f +George 22 3.8 NULL 2011-01-01 01:01:01 George NULL 6 7.6 +NULL NULL NULL NULL 2011-01-01 01:01:01 NULL NULL NULL NULL +calvin brown 28 2.7 NULL 2011-01-01 01:01:01 NULL calvin brown 12 NULL +luke brown 60 1.14 60 NULL NULL luke brown NULL 2.28 +luke king 28 0.47 NULL 2011-01-01 01:01:01 NULL luke king 9 0.94 +nick johnson 34 NULL NULL 2011-01-01 01:01:01 NULL nick johnson 12 NULL +oscar thompson 35 2.98 NULL 2011-01-01 01:01:01 NULL oscar thompson 14 NULL +priscilla falkner 55 1.16 55 NULL NULL priscilla falkner NULL NULL +quinn ovid 19 NULL NULL 2011-01-01 01:01:01 NULL quinn ovid 10 NULL +tom thompson 42 0.53 42 NULL NULL tom thompson NULL NULL +ulysses garcia 35 2.74 NULL 2011-01-01 01:01:01 NULL ulysses garcia 14 NULL +xavier garcia 33 1.06 NULL 2011-01-01 01:01:01 NULL xavier garcia 13 NULL diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 8d3f163..de30ca7 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -51,13 +51,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint + aggregators: VectorUDAFCount(col 7:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 9143816..19d9e39 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -97,7 +97,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -217,8 +217,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -246,7 +246,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -594,7 +594,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -674,8 +674,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -703,7 +703,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -755,8 +755,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -784,7 +784,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -855,7 +855,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -936,7 +936,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1017,7 +1017,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 5380c9c..39d3bbe 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -68,8 +68,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -96,7 +96,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -216,8 +216,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -244,7 +244,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -588,7 +588,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -667,8 +667,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -695,7 +695,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -747,8 +747,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -775,7 +775,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -846,7 +846,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -926,7 +926,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1006,7 +1006,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_case_when_1.q.out ql/src/test/results/clientpositive/vector_case_when_1.q.out new file mode 100644 index 0000000..f80f477 --- /dev/null +++ ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -0,0 +1,1126 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, col 7:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, col 7:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 35:decimal(10,2), IfExprTimestampColumnColumn(col 19:boolean, col 36:timestampcol 37:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 19:boolean, CastDateToTimestamp(col 12:date) -> 36:timestamp, CastDateToTimestamp(col 11:date) -> 37:timestamp) -> 38:timestamp, IfExprColumnNull(col 19:boolean, col 39:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 19:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 39:int) -> 40:int, IfExprNullColumn(col 41:boolean, null, col 42)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 41:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 42:int) -> 43:int, IfExprLongScalarLongScalar(col 45:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 44:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 44:int) -> 45:boolean) -> 44:date + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, double, double, double, decimal(10,2), decimal(10,2), decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2), 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 7)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, col 7:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 7:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, col 7:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2), VectorUDFAdaptor(if((UDFToString(l_shipinstruct) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 70:decimal(10,2), IfExprCondExprCondExpr(col 66:boolean, col 71:timestampcol 72:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 66:boolean, CastDateToTimestamp(col 12:date) -> 71:timestamp, CastDateToTimestamp(col 11:date) -> 72:timestamp) -> 73:timestamp, IfExprCondExprNull(col 74:boolean, col 75:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 74:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 75:int) -> 76:int, IfExprNullCondExpr(col 77:boolean, null, col 78:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 77:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 78:int) -> 79:int, IfExprLongScalarLongScalar(col 81:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 80:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 80:int) -> 81:boolean) -> 80:date + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2), l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2), decimal(10,2), timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: date), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: decimal(10,2)), VALUE._col7 (type: decimal(10,2)), VALUE._col8 (type: decimal(12,2)), VALUE._col9 (type: decimal(12,2)), VALUE._col10 (type: decimal(10,2)), VALUE._col11 (type: decimal(10,2)), VALUE._col12 (type: timestamp), VALUE._col13 (type: int), VALUE._col14 (type: int), VALUE._col15 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 78920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT + L_QUANTITY as Quantity, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE "Huge number" END AS Quantity_Description, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN "Many" + ELSE NULL END AS Quantity_Description_2, + CASE + WHEN L_QUANTITY = 1 THEN "Single" + WHEN L_QUANTITY = 2 THEN "Two" + WHEN L_QUANTITY < 10 THEN "Some" + WHEN L_QUANTITY < 100 THEN NULL + ELSE NULL END AS Quantity_Description_3, + IF(L_SHIPMODE = "SHIP", DATE_ADD(l_shipdate, 10), DATE_ADD(l_shipdate, 5)) AS Expected_Date, + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE 0 END AS Field_1, -- The 0 will be an integer and requires implicit casting. + CASE WHEN L_RETURNFLAG = "N" + THEN l_extendedprice * (1 - l_discount) + ELSE CAST(0 AS DOUBLE) END AS Field_2, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", NULL, L_TAX) AS Field_3, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, NULL) AS Field_4, + -- For the next 2 IF stmts, the 0s are integer and require implicit casting to decimal. + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0, L_TAX) AS Field_5, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0) AS Field_6, + IF(L_SHIPINSTRUCT = "DELIVER IN PERSON", 0BD, L_TAX) AS Field_7, + IF(L_SHIPINSTRUCT = "TAKE BACK RETURN", L_TAX, 0BD) AS Field_8, + IF(L_PARTKEY > 30, CAST(L_RECEIPTDATE AS TIMESTAMP), CAST(L_COMMITDATE AS TIMESTAMP)) AS Field_9, + IF(L_SUPPKEY > 10000, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE), NULL) AS Field_10, + IF(L_SUPPKEY > 10000, NULL, DATEDIFF(L_RECEIPTDATE, L_COMMITDATE)) AS Field_11, + IF(L_SUPPKEY % 500 > 100, DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365)) AS Field_12 +FROM lineitem_test +ORDER BY Quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +quantity quantity_description quantity_description_2 quantity_description_3 expected_date field_1 field_2 field_3 field_4 field_5 field_6 field_7 field_8 field_9 field_10 field_11 field_12 +NULL Huge number NULL NULL NULL 0.0 0.0 NULL NULL NULL 0.00 NULL 0.00 NULL NULL NULL 2009-12-31 +1 Single Single Single 1994-12-06 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-12-15 00:00:00 NULL 3 2009-01-01 +1 Single Single Single 1994-01-31 0.0 0.0 0.05 0.05 0.05 0.05 0.05 0.05 1994-01-28 00:00:00 NULL -36 2009-01-01 +2 Two Two Two 1995-08-12 2011.3912000000003 2011.3912000000003 NULL NULL 0.00 0.00 0.00 0.00 1995-08-23 00:00:00 NULL -45 2009-01-01 +2 Two Two Two 1993-12-09 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-01-01 00:00:00 NULL -6 2009-01-01 +3 Some Some Some 1994-06-11 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-06-15 00:00:00 NULL -42 2009-12-31 +3 Some Some Some 1998-06-02 5137.6143 5137.6143 0.07 NULL 0.07 0.00 0.07 0.00 1998-06-02 00:00:00 NULL 60 2009-01-01 +3 Some Some Some 1998-07-09 2778.921 2778.921 0.02 NULL 0.02 0.00 0.02 0.00 1998-07-21 00:00:00 NULL 46 2009-12-31 +4 Some Some Some 1995-08-09 5990.4936 5990.4936 0.03 NULL 0.03 0.00 0.03 0.00 1995-09-03 00:00:00 NULL -28 2009-01-01 +4 Some Some Some 1997-04-27 5669.7732000000005 5669.7732000000005 0.04 NULL 0.04 0.00 0.04 0.00 1997-04-20 00:00:00 NULL 79 2009-01-01 +5 Some Some Some 1997-02-25 8116.96 8116.96 NULL NULL 0.00 0.00 0.00 0.00 1997-02-21 00:00:00 NULL 9 2009-01-01 +5 Some Some Some 1996-02-15 6217.103999999999 6217.103999999999 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-13 00:00:00 NULL -42 2009-01-01 +5 Some Some Some 1993-12-14 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1993-12-23 00:00:00 NULL -2 2009-01-01 +6 Some Some Some 1998-11-04 9487.6152 9487.6152 0.06 NULL 0.06 0.00 0.06 0.00 1998-11-05 00:00:00 NULL 46 2009-12-31 +6 Some Some Some 1995-07-26 8793.2736 8793.2736 0.03 NULL 0.03 0.00 0.03 0.00 1995-07-25 00:00:00 NULL -60 2009-01-01 +7 Some Some Some 1996-01-24 12613.136199999999 12613.136199999999 0.04 NULL 0.04 0.00 0.04 0.00 1996-01-29 00:00:00 NULL 38 2009-01-01 +8 Some Some Some 1994-01-17 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-01-14 00:00:00 NULL -44 2009-01-01 +8 Some Some Some 1996-02-03 11978.640000000001 11978.640000000001 0.02 0.02 0.02 0.02 0.02 0.02 1996-01-31 00:00:00 NULL -34 2009-01-01 +9 Some Some Some 1996-02-11 10666.6272 10666.6272 0.08 0.08 0.08 0.08 0.08 0.08 1996-02-19 00:00:00 NULL -12 2009-01-01 +11 Many Many NULL 1994-03-22 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1994-03-27 00:00:00 NULL 10 2009-01-01 +12 Many Many NULL 1996-05-12 12655.998 12655.998 0.03 0.03 0.03 0.03 0.03 0.03 1996-06-03 00:00:00 NULL 82 2009-01-01 +12 Many Many NULL 1997-02-01 12156.034800000001 12156.034800000001 0.05 NULL 0.05 0.00 0.05 0.00 1997-02-22 00:00:00 NULL 1 2009-01-01 +13 Many Many NULL 1994-03-08 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-03-26 00:00:00 NULL 41 2009-01-01 +13 Many Many NULL 1998-10-28 17554.68 17554.68 0.07 NULL 0.07 0.00 0.07 0.00 1998-11-06 00:00:00 NULL 53 2009-01-01 +13 Many Many NULL 1993-04-06 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1993-04-08 00:00:00 NULL 4 2009-01-01 +14 Many Many NULL 1995-01-04 0.0 0.0 0.02 NULL 0.02 0.00 0.02 0.00 1995-01-27 00:00:00 NULL 66 2009-01-01 +15 Many Many NULL 1994-11-05 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-11-20 00:00:00 NULL 81 2009-12-31 +17 Many Many NULL 1996-03-18 20321.500799999998 20321.500799999998 NULL NULL 0.00 0.00 0.00 0.00 1996-03-22 00:00:00 NULL 39 2009-01-01 +17 Many Many NULL 1994-07-07 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-07-03 00:00:00 NULL -4 2009-01-01 +19 Many Many NULL 1993-05-19 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1993-05-25 00:00:00 NULL 81 2009-01-01 +19 Many Many NULL 1994-02-05 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1994-02-06 00:00:00 NULL -11 2009-01-01 +20 Many Many NULL 1998-07-02 32042.592 32042.592 0.01 NULL 0.01 0.00 0.01 0.00 1998-07-02 00:00:00 NULL 40 2009-01-01 +21 Many Many NULL 1995-07-11 24640.0518 24640.0518 NULL NULL 0.00 0.00 0.00 0.00 1995-07-31 00:00:00 NULL 78 2009-01-01 +21 Many Many NULL 1994-10-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-10-26 00:00:00 NULL 38 2009-01-01 +22 Many Many NULL 1998-10-14 28405.0184 28405.0184 0.06 NULL 0.06 0.00 0.06 0.00 1998-10-12 00:00:00 NULL -4 2009-01-01 +22 Many Many NULL 1995-07-22 39353.82 39353.82 0.05 NULL 0.05 0.00 0.05 0.00 1995-07-19 00:00:00 NULL 45 2009-01-01 +23 Many Many NULL 1997-04-24 33946.3785 33946.3785 NULL NULL 0.00 0.00 0.00 0.00 1997-05-06 00:00:00 NULL 81 2009-01-01 +23 Many Many NULL 1994-10-13 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-10-24 00:00:00 NULL 79 2009-12-31 +23 Many Many NULL 1994-07-24 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-07-25 00:00:00 NULL 26 2009-01-01 +24 Many Many NULL 1996-04-04 20542.032 20542.032 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-01 00:00:00 NULL 18 2009-12-31 +24 Many Many NULL 1996-02-26 31762.584 31762.584 0.00 0.00 0.00 0.00 0.00 0.00 1996-03-18 00:00:00 NULL 75 2009-01-01 +25 Many Many NULL 1998-04-15 43064.1575 43064.1575 0.07 NULL 0.07 0.00 0.07 0.00 1998-04-11 00:00:00 NULL -11 2009-01-01 +25 Many Many NULL 1995-12-06 27263.995 27263.995 NULL NULL 0.00 0.00 0.00 0.00 1995-12-21 00:00:00 NULL -4 2009-01-01 +26 Many Many NULL 1996-11-09 39912.433600000004 39912.433600000004 0.04 NULL 0.04 0.00 0.04 0.00 1996-11-20 00:00:00 NULL 31 2009-01-01 +26 Many Many NULL 1995-04-25 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1995-05-13 00:00:00 NULL 18 2009-01-01 +26 Many Many NULL 1994-10-21 0.0 0.0 0.08 NULL 0.08 0.00 0.08 0.00 1994-10-19 00:00:00 NULL 24 2009-01-01 +26 Many Many NULL 1993-11-03 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1993-11-04 00:00:00 NULL -44 2009-01-01 +27 Many Many NULL 1994-01-26 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-01-23 00:00:00 NULL 62 2009-01-01 +27 Many Many NULL 1998-06-29 45590.2425 45590.2425 NULL NULL 0.00 0.00 0.00 0.00 1998-06-29 00:00:00 NULL 4 2009-01-01 +28 Many Many NULL 1995-10-28 44866.219999999994 44866.219999999994 0.08 0.08 0.08 0.08 0.08 0.08 1995-10-26 00:00:00 NULL 60 2009-01-01 +28 Many Many NULL 1994-12-29 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1995-01-16 00:00:00 NULL 83 2009-01-01 +28 Many Many NULL 1996-04-26 26349.6324 26349.6324 0.06 NULL 0.06 0.00 0.06 0.00 1996-05-16 00:00:00 NULL 47 2009-01-01 +28 Many Many NULL 1996-03-26 30855.6612 30855.6612 0.04 NULL 0.04 0.00 0.04 0.00 1996-04-20 00:00:00 NULL 12 2009-12-31 +28 Many Many NULL 1993-12-19 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1994-01-01 00:00:00 NULL -9 2009-01-01 +28 Many Many NULL 1996-02-06 45975.3616 45975.3616 0.02 NULL 0.02 0.00 0.02 0.00 1996-02-28 00:00:00 NULL 66 2009-01-01 +29 Many Many NULL 1997-01-30 39341.806 39341.806 NULL NULL 0.00 0.00 0.00 0.00 1997-01-27 00:00:00 NULL 0 2009-01-01 +30 Many Many NULL 1994-06-08 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-06-22 00:00:00 NULL 24 2009-01-01 +30 Many Many NULL 1996-01-15 29770.173 29770.173 NULL NULL 0.00 0.00 0.00 0.00 1996-01-18 00:00:00 NULL 35 2009-12-31 +30 Many Many NULL 1998-08-16 44561.46 44561.46 0.06 NULL 0.06 0.00 0.06 0.00 1998-08-14 00:00:00 NULL 34 2009-12-31 +31 Many Many NULL 1994-02-24 0.0 0.0 0.08 0.08 0.08 0.08 0.08 0.08 1994-02-20 00:00:00 NULL -19 2009-01-01 +31 Many Many NULL 1993-11-03 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1993-11-08 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1995-08-19 63313.3312 63313.3312 0.00 NULL 0.00 0.00 0.00 0.00 1995-08-27 00:00:00 NULL -41 2009-01-01 +32 Many Many NULL 1993-12-14 0.0 0.0 0.05 NULL 0.05 0.00 0.05 0.00 1993-12-28 00:00:00 NULL -7 2009-12-31 +32 Many Many NULL 1994-08-29 0.0 0.0 0.06 NULL 0.06 0.00 0.06 0.00 1994-08-31 00:00:00 NULL 14 2009-01-01 +32 Many Many NULL 1996-10-07 44955.15839999999 44955.15839999999 0.05 NULL 0.05 0.00 0.05 0.00 1996-10-14 00:00:00 NULL -66 2009-12-31 +32 Many Many NULL 1996-02-04 46146.7488 46146.7488 NULL NULL 0.00 0.00 0.00 0.00 1996-02-03 00:00:00 NULL -4 2009-01-01 +33 Many Many NULL 1998-04-17 54174.12 54174.12 0.01 NULL 0.01 0.00 0.01 0.00 1998-04-15 00:00:00 NULL 26 2009-01-01 +34 Many Many NULL 1998-03-10 56487.763199999994 56487.763199999994 NULL NULL 0.00 0.00 0.00 0.00 1998-03-30 00:00:00 NULL -23 2009-01-01 +34 Many Many NULL 1996-01-27 63982.002400000005 63982.002400000005 NULL NULL 0.00 0.00 0.00 0.00 1996-01-27 00:00:00 NULL 21 2009-01-01 +34 Many Many NULL 1995-11-13 60586.5448 60586.5448 0.06 NULL 0.06 0.00 0.06 0.00 1995-11-26 00:00:00 NULL -50 2009-01-01 +35 Many Many NULL 1996-01-21 40475.225 40475.225 0.03 0.03 0.03 0.03 0.03 0.03 1996-01-22 00:00:00 NULL -32 2009-01-01 +36 Many Many NULL 1996-04-17 41844.6756 41844.6756 0.06 0.06 0.06 0.06 0.06 0.06 1996-04-20 00:00:00 NULL 52 2009-01-01 +37 Many Many NULL 1994-02-18 0.0 0.0 0.04 NULL 0.04 0.00 0.04 0.00 1994-02-21 00:00:00 NULL -23 2009-01-01 +37 Many Many NULL 1993-04-23 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1993-04-14 00:00:00 NULL 15 2009-12-31 +37 Many Many NULL 1992-05-02 0.0 0.0 0.03 0.03 0.03 0.03 0.03 0.03 1992-05-02 00:00:00 NULL -13 2009-01-01 +38 Many Many NULL 1997-02-02 44694.46 44694.46 0.05 0.05 0.05 0.05 0.05 0.05 1997-02-02 00:00:00 NULL 19 2009-01-01 +38 Many Many NULL 1996-02-16 68028.3144 68028.3144 NULL NULL 0.00 0.00 0.00 0.00 1996-02-18 00:00:00 NULL -6 2009-01-01 +39 Many Many NULL 1998-02-03 45146.01 45146.01 NULL NULL 0.00 0.00 0.00 0.00 1998-02-18 00:00:00 NULL -48 2009-01-01 +39 Many Many NULL 1992-07-07 0.0 0.0 0.02 0.02 0.02 0.02 0.02 0.02 1992-07-28 00:00:00 NULL -21 2009-01-01 +40 Many Many NULL 1996-12-13 51224.736 51224.736 0.05 NULL 0.05 0.00 0.05 0.00 1997-01-01 00:00:00 NULL 71 2009-01-01 +40 Many Many NULL 1992-07-26 0.0 0.0 0.03 NULL 0.03 0.00 0.03 0.00 1992-08-15 00:00:00 NULL 14 2009-01-01 +41 Many Many NULL 1998-07-04 47989.6144 47989.6144 0.08 NULL 0.08 0.00 0.08 0.00 1998-07-06 00:00:00 NULL 9 2009-01-01 +41 Many Many NULL 1994-02-26 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-03-18 00:00:00 NULL 17 2009-01-01 +41 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-11 00:00:00 NULL -74 2009-01-01 +42 Many Many NULL 1994-08-05 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-28 00:00:00 NULL 33 2009-12-31 +42 Many Many NULL 1996-02-13 68289.9672 68289.9672 0.00 NULL 0.00 0.00 0.00 0.00 1996-02-23 00:00:00 NULL 33 2009-01-01 +43 Many Many NULL 1996-10-22 62727.3207 62727.3207 0.01 NULL 0.01 0.00 0.01 0.00 1996-10-26 00:00:00 NULL -19 2009-12-31 +43 Many Many NULL 1992-07-15 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1992-08-02 00:00:00 NULL 27 2009-01-01 +44 Many Many NULL 1997-03-23 60781.124800000005 60781.124800000005 NULL NULL 0.00 0.00 0.00 0.00 1997-04-13 00:00:00 NULL 74 2009-12-31 +44 Many Many NULL 1996-10-04 80882.4192 80882.4192 0.02 NULL 0.02 0.00 0.02 0.00 1996-09-30 00:00:00 NULL -48 2009-01-01 +44 Many Many NULL 1995-09-02 75106.658 75106.658 NULL NULL 0.00 0.00 0.00 0.00 1995-09-14 00:00:00 NULL 25 2009-01-01 +44 Many Many NULL 1996-11-19 48941.692800000004 48941.692800000004 0.06 NULL 0.06 0.00 0.06 0.00 1996-12-12 00:00:00 NULL -3 2009-01-01 +45 Many Many NULL 1998-03-05 61489.35 61489.35 NULL NULL 0.00 0.00 0.00 0.00 1998-03-24 00:00:00 NULL 4 2009-01-01 +45 Many Many NULL 1994-02-07 0.0 0.0 0.00 NULL 0.00 0.00 0.00 0.00 1994-02-23 00:00:00 NULL 50 2009-01-01 +46 Many Many NULL 1996-01-20 73475.892 73475.892 0.07 NULL 0.07 0.00 0.07 0.00 1996-02-03 00:00:00 NULL -53 2009-01-01 +46 Many Many NULL 1998-07-01 56583.5144 56583.5144 0.05 NULL 0.05 0.00 0.05 0.00 1998-07-05 00:00:00 NULL 28 2009-01-01 +46 Many Many NULL 1998-08-18 84565.5168 84565.5168 0.05 NULL 0.05 0.00 0.05 0.00 1998-08-29 00:00:00 NULL 52 2009-01-01 +46 Many Many NULL 1996-10-01 77781.4092 77781.4092 NULL NULL 0.00 0.00 0.00 0.00 1996-10-26 00:00:00 NULL -54 2009-01-01 +48 Many Many NULL 1994-08-22 0.0 0.0 0.07 NULL 0.07 0.00 0.07 0.00 1994-09-08 00:00:00 NULL 28 2009-01-01 +49 Many Many NULL 1993-11-14 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 1993-11-24 00:00:00 NULL -26 2009-12-31 +50 Many Many NULL 1994-08-13 0.0 0.0 NULL NULL 0.00 0.00 0.00 0.00 1994-08-26 00:00:00 NULL -48 2009-12-31 diff --git ql/src/test/results/clientpositive/vector_case_when_2.q.out ql/src/test/results/clientpositive/vector_case_when_2.q.out new file mode 100644 index 0000000..7b09638 --- /dev/null +++ ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -0,0 +1,806 @@ +PREHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps_txt +POSTHOOK: query: create table timestamps_txt (tsval timestamp) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@timestamps_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps.txt' OVERWRITE INTO TABLE timestamps_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@timestamps_txt +PREHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps +POSTHOOK: query: create table timestamps (cdate date, ctimestamp1 timestamp, stimestamp1 string, ctimestamp2 timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps +PREHOOK: query: insert overwrite table timestamps + select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps_txt +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert overwrite table timestamps + select cast(tsval as date), tsval, cast(tsval as string), tsval - '1 2:3:4' day to second from timestamps_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps_txt +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.cdate EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.ctimestamp1 SIMPLE [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [(timestamps_txt)timestamps_txt.FieldSchema(name:tsval, type:timestamp, comment:null), ] +tsval tsval _c2 _c3 +PREHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps +POSTHOOK: query: INSERT INTO TABLE timestamps VALUES (NULL,NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.cdate EXPRESSION [] +POSTHOOK: Lineage: timestamps.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: timestamps.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: timestamps.stimestamp1 EXPRESSION [] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unexpected primitive type category VOID + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct] + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: timestamps + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct] + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00.0')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00.0')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00.0')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN (null) ELSE (null) END (type: string), if((TIMESTAMP'1974-10-04 17:21:03.989' > ctimestamp1), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00.0) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00.0) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00.0) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00.0' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampScalarGreaterTimestampColumn(val 1974-10-04 17:21:03.989, col 1:timestamp) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59.0') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: cdate:date, ctimestamp1:timestamp, stimestamp1:string, ctimestamp2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, bigint, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 12384 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: SELECT + ctimestamp1, + ctimestamp2, + CASE + WHEN ctimestamp2 <= date '1800-12-31' THEN "1800s or Earlier" + WHEN ctimestamp2 < date '1900-01-01' THEN "1900s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE "Unknown" END AS ctimestamp2_Description, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between cast('2006-01-01 00:00:00.000' as timestamp) and cast('2010-12-31 23:59:59.999999999' as timestamp) THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN "Early 2010s" + ELSE NULL END AS ctimestamp2_Description_2, + CASE + WHEN ctimestamp2 <= timestamp '2000-12-31 23:59:59.999999999' THEN "Old" + WHEN ctimestamp2 < timestamp '2006-01-01 00:00:00.000' THEN "Early 2000s" + WHEN ctimestamp2 between timestamp '2006-01-01 00:00:00.000' and timestamp '2010-12-31 23:59:59.999999999' THEN "Late 2000s" + WHEN ctimestamp2 <= timestamp '2015-12-31 23:59:59.999999999' THEN NULL + ELSE NULL END AS ctimestamp2_Description_3, + IF(timestamp '1974-10-04 17:21:03.989' > ctimestamp1, year(ctimestamp1), year(ctimestamp2)) AS field1, + CASE WHEN stimestamp1 LIKE '%19%' + THEN stimestamp1 + ELSE timestamp '2018-03-08 23:04:59' END AS Field_2, + IF(ctimestamp1 = timestamp '2021-09-24 03:18:32.413655165' , NULL, minute(ctimestamp1)) AS Field_3, + IF(ctimestamp2 >= timestamp '5344-10-04 18:40:08.165' and ctimestamp2 < timestamp '6631-11-13 16:31:29.702202248', minute(ctimestamp1), NULL) AS Field_4, + IF(cast(ctimestamp1 as double) % 500 > 100, DATE_ADD(cdate, 1), DATE_ADD(cdate, 365)) AS Field_5 +FROM timestamps +ORDER BY ctimestamp1, stimestamp1, ctimestamp2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +ctimestamp1 ctimestamp2 ctimestamp2_description ctimestamp2_description_2 ctimestamp2_description_3 field1 field_2 field_3 field_4 field_5 +NULL NULL Unknown NULL NULL NULL 2018-03-08 23:04:59 NULL NULL NULL +0004-09-22 18:26:29.519542222 0004-09-21 16:23:25.519542222 1800s or Earlier Old Old 4 0004-09-22 18:26:29.519542222 26 NULL 0005-09-22 +0528-10-27 08:15:18.941718273 0528-10-26 06:12:14.941718273 1800s or Earlier Old Old 528 2018-03-08 23:04:59 15 NULL 0529-10-27 +1319-02-02 16:31:57.778 1319-02-01 14:28:53.778 1800s or Earlier Old Old 1319 1319-02-02 16:31:57.778 31 NULL 1320-02-02 +1404-07-23 15:32:16.059185026 1404-07-22 13:29:12.059185026 1800s or Earlier Old Old 1404 2018-03-08 23:04:59 32 NULL 1405-07-23 +1815-05-06 00:12:37.543584705 1815-05-04 22:09:33.543584705 1900s Old Old 1815 2018-03-08 23:04:59 12 NULL 1816-05-05 +1883-04-17 04:14:34.647766229 1883-04-16 02:11:30.647766229 1900s Old Old 1883 2018-03-08 23:04:59 14 NULL 1884-04-16 +1966-08-16 13:36:50.183618031 1966-08-15 11:33:46.183618031 Early 2010s Old Old 1966 1966-08-16 13:36:50.183618031 36 NULL 1967-08-16 +1973-04-17 06:30:38.596784156 1973-04-16 04:27:34.596784156 Early 2010s Old Old 1973 1973-04-17 06:30:38.596784156 30 NULL 1974-04-17 +1974-10-04 17:21:03.989 1974-10-03 15:17:59.989 Early 2010s Old Old 1974 1974-10-04 17:21:03.989 21 NULL 1974-10-05 +1976-03-03 04:54:33.000895162 1976-03-02 02:51:29.000895162 Early 2010s Old Old 1976 1976-03-03 04:54:33.000895162 54 NULL 1976-03-04 +1976-05-06 00:42:30.910786948 1976-05-04 22:39:26.910786948 Early 2010s Old Old 1976 1976-05-06 00:42:30.910786948 42 NULL 1977-05-06 +1978-08-05 14:41:05.501 1978-08-04 12:38:01.501 Early 2010s Old Old 1978 1978-08-05 14:41:05.501 41 NULL 1978-08-06 +1981-04-25 09:01:12.077192689 1981-04-24 06:58:08.077192689 Early 2010s Old Old 1981 1981-04-25 09:01:12.077192689 1 NULL 1982-04-25 +1981-11-15 23:03:10.999338387 1981-11-14 21:00:06.999338387 Early 2010s Old Old 1981 1981-11-15 23:03:10.999338387 3 NULL 1981-11-16 +1985-07-20 09:30:11 1985-07-19 07:27:07 Early 2010s Old Old 1985 1985-07-20 09:30:11 30 NULL 1986-07-20 +1985-11-18 16:37:54 1985-11-17 14:34:50 Early 2010s Old Old 1985 1985-11-18 16:37:54 37 NULL 1985-11-19 +1987-02-21 19:48:29 1987-02-20 17:45:25 Early 2010s Old Old 1987 1987-02-21 19:48:29 48 NULL 1987-02-22 +1987-05-28 13:52:07.900916635 1987-05-27 11:49:03.900916635 Early 2010s Old Old 1987 1987-05-28 13:52:07.900916635 52 NULL 1988-05-27 +1998-10-16 20:05:29.397591987 1998-10-15 18:02:25.397591987 Early 2010s Old Old 1998 1998-10-16 20:05:29.397591987 5 NULL 1999-10-16 +1999-10-03 16:59:10.396903939 1999-10-02 14:56:06.396903939 Early 2010s Old Old 1999 1999-10-03 16:59:10.396903939 59 NULL 1999-10-04 +2000-12-18 08:42:30.000595596 2000-12-17 06:39:26.000595596 Early 2010s Old Old 2000 2018-03-08 23:04:59 42 NULL 2000-12-19 +2002-05-10 05:29:48.990818073 2002-05-09 03:26:44.990818073 Early 2010s Early 2000s Early 2000s 2002 2018-03-08 23:04:59 29 NULL 2002-05-11 +2003-09-23 22:33:17.00003252 2003-09-22 20:30:13.00003252 Early 2010s Early 2000s Early 2000s 2003 2018-03-08 23:04:59 33 NULL 2004-09-22 +2004-03-07 20:14:13 2004-03-06 18:11:09 Early 2010s Early 2000s Early 2000s 2004 2018-03-08 23:04:59 14 NULL 2004-03-08 +2007-02-09 05:17:29.368756876 2007-02-08 03:14:25.368756876 Late 2000s Late 2000s Late 2000s 2007 2018-03-08 23:04:59 17 NULL 2008-02-09 +2009-01-21 10:49:07.108 2009-01-20 08:46:03.108 Late 2000s Late 2000s Late 2000s 2009 2018-03-08 23:04:59 49 NULL 2009-01-22 +2010-04-08 02:43:35.861742727 2010-04-07 00:40:31.861742727 Late 2000s Late 2000s Late 2000s 2010 2018-03-08 23:04:59 43 NULL 2010-04-09 +2013-04-07 02:44:43.00086821 2013-04-06 00:41:39.00086821 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 44 NULL 2013-04-08 +2013-04-10 00:43:46.854731546 2013-04-08 22:40:42.854731546 Early 2010s Early 2010s NULL 2013 2018-03-08 23:04:59 43 NULL 2013-04-11 +2021-09-24 03:18:32.413655165 2021-09-23 01:15:28.413655165 Unknown NULL NULL 2021 2018-03-08 23:04:59 NULL NULL 2021-09-25 +2024-11-11 16:42:41.101 2024-11-10 14:39:37.101 Unknown NULL NULL 2024 2018-03-08 23:04:59 42 NULL 2024-11-12 +4143-07-08 10:53:27.252802259 4143-07-07 08:50:23.252802259 Unknown NULL NULL 4143 2018-03-08 23:04:59 53 NULL 4143-07-09 +4966-12-04 09:30:55.202 4966-12-03 07:27:51.202 Unknown NULL NULL 4966 2018-03-08 23:04:59 30 NULL 4966-12-05 +5339-02-01 14:10:01.085678691 5339-01-31 12:06:57.085678691 Unknown NULL NULL 5339 2018-03-08 23:04:59 10 NULL 5340-02-01 +5344-10-04 18:40:08.165 5344-10-03 16:37:04.165 Unknown NULL NULL 5344 2018-03-08 23:04:59 40 NULL 5344-10-05 +5397-07-13 07:12:32.000896438 5397-07-12 05:09:28.000896438 Unknown NULL NULL 5397 2018-03-08 23:04:59 12 12 5397-07-14 +5966-07-09 03:30:50.597 5966-07-08 01:27:46.597 Unknown NULL NULL 5966 2018-03-08 23:04:59 30 30 5967-07-09 +6229-06-28 02:54:28.970117179 6229-06-27 00:51:24.970117179 Unknown NULL NULL 6229 2018-03-08 23:04:59 54 54 6230-06-28 +6482-04-27 12:07:38.073915413 6482-04-26 10:04:34.073915413 Unknown NULL NULL 6482 2018-03-08 23:04:59 7 7 6482-04-28 +6631-11-13 16:31:29.702202248 6631-11-12 14:28:25.702202248 Unknown NULL NULL 6631 2018-03-08 23:04:59 31 31 6631-11-14 +6705-09-28 18:27:28.000845672 6705-09-27 16:24:24.000845672 Unknown NULL NULL 6705 2018-03-08 23:04:59 27 NULL 6705-09-29 +6731-02-12 08:12:48.287783702 6731-02-11 06:09:44.287783702 Unknown NULL NULL 6731 2018-03-08 23:04:59 12 NULL 6731-02-13 +7160-12-02 06:00:24.81200852 7160-12-01 03:57:20.81200852 Unknown NULL NULL 7160 2018-03-08 23:04:59 0 NULL 7161-12-02 +7409-09-07 23:33:32.459349602 7409-09-06 21:30:28.459349602 Unknown NULL NULL 7409 2018-03-08 23:04:59 33 NULL 7409-09-08 +7503-06-23 23:14:17.486 7503-06-22 21:11:13.486 Unknown NULL NULL 7503 2018-03-08 23:04:59 14 NULL 7503-06-24 +8422-07-22 03:21:45.745036084 8422-07-21 01:18:41.745036084 Unknown NULL NULL 8422 2018-03-08 23:04:59 21 NULL 8422-07-23 +8521-01-16 20:42:05.668832388 8521-01-15 18:39:01.668832388 Unknown NULL NULL 8521 2018-03-08 23:04:59 42 NULL 8521-01-17 +9075-06-13 16:20:09.218517797 9075-06-12 14:17:05.218517797 Unknown NULL NULL 9075 2018-03-08 23:04:59 20 NULL 9075-06-14 +9209-11-11 04:08:58.223768453 9209-11-10 02:05:54.223768453 Unknown NULL NULL 9209 2018-03-08 23:04:59 8 NULL 9209-11-12 +9403-01-09 18:12:33.547 9403-01-08 16:09:29.547 Unknown NULL NULL 9403 2018-03-08 23:04:59 12 NULL 9403-01-10 diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index d7cc0b7..13fb6d1 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -45,13 +45,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 6] - selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnCondExpr(col 1:boolean, col 3:intcol 6:int)(children: col 1:boolean, ConstantVectorExpression(val 1) -> 3:int, IfExprColumnNull(col 4:boolean, col 5:int, null)(children: NotCol(col 1:boolean) -> 4:boolean, ConstantVectorExpression(val 0) -> 5:int) -> 6:int) -> 7:int Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:int) -> bigint + aggregators: VectorUDAFCount(col 7:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 428781f..74b4426 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -65,8 +65,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 16, 17] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 16:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 15:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprStringScalarStringScalar(col 14:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean) -> 15:string) -> 17:string + projectedOutputColumnNums: [1, 17, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -93,7 +93,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -210,8 +210,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 17, 20] - selectExpressions: IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprColumnNull(col 14:boolean, col 15:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 14:boolean, ConstantVectorExpression(val b) -> 15:string) -> 16:string) -> 17:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val acol 19:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, IfExprNullColumn(col 18:boolean, null, col 16)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 18:boolean, ConstantVectorExpression(val c) -> 16:string) -> 19:string) -> 20:string + projectedOutputColumnNums: [1, 18, 24] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -238,7 +238,7 @@ STAGE PLANS: includeColumns: [1] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] + scratchColumnTypeNames: [bigint, string, bigint, string, string, string, bigint, string, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -525,7 +525,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:decimal(11,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -601,8 +601,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 4:decimal(1,0)col 5:decimal(11,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:decimal(1,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -629,7 +629,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(1,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -678,8 +678,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0) + projectedOutputColumnNums: [6] + selectExpressions: IfExprCondExprColumn(col 3:boolean, col 4:decimal(11,0), col 5:decimal(1,0))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), ConstantVectorExpression(val 2) -> 5:decimal(1,0)) -> 6:decimal(11,0) Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -706,7 +706,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: member:decimal(10,0), attr:decimal(10,0) partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(1,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -774,7 +774,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6] - selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint + selectExpressions: IfExprCondExprCondExpr(col 3:boolean, col 4:bigintcol 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -851,7 +851,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprNullCondExpr(col 3:boolean, null, col 4:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -928,7 +928,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint + selectExpressions: IfExprCondExprNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false