diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index f1eef14..c41efb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -310,4 +310,15 @@ public void fill(byte[] value) { isRepeating = true; setRef(0, value, 0, value.length); } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + BytesColumnVector in = (BytesColumnVector) inputVector; + setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[outElementNum]); + } + + @Override + public void init() { + initBuffer(0); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 0a8811f..6b95360 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -142,5 +142,19 @@ protected void flattenPush() { preFlattenIsRepeating = isRepeating; preFlattenNoNulls = noNulls; } + + /** + * Set the element in this column vector from the given input vector. + */ + public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + + /** + * Initialize the column vector. This method can be overridden by specific column vector types. + * Use this method only if the individual type of the column vector is not known, otherwise its + * preferable to call specific initialization methods. + */ + public void init() { + // Do nothing by default + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index d0d8597..8672922 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -76,6 +76,12 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // TODO Auto-generated method stub } + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + vector[outElementNum].update(((DecimalColumnVector) inputVector).vector[inputElementNum]); + vector[outElementNum].changeScaleDestructive(scale); + } + /** * Check if the value at position i fits in the available precision, * and convert the value to NULL if it does not. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index cb23129..525b3c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -141,4 +141,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } flattenNoNulls(selectedInUse, sel, size); } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum]; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index aa05b19..f0545fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -185,4 +185,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } flattenNoNulls(selectedInUse, sel, size); } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum]; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 7141d63..f69bfc0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -93,10 +93,8 @@ import org.apache.hadoop.hive.ql.udf.UDFToString; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.typeinfo.*; /** @@ -115,7 +113,6 @@ //columnName to column position map private final Map columnMap; private final int firstOutputColumnIndex; - private final Mode operatorMode = Mode.PROJECTION; public static final Pattern decimalTypePattern = Pattern.compile("decimal.*"); @@ -134,6 +131,7 @@ castExpressionUdfs.add(GenericUDFToUtcTimestamp.class); castExpressionUdfs.add(GenericUDFToChar.class); castExpressionUdfs.add(GenericUDFToVarchar.class); + castExpressionUdfs.add(GenericUDFTimestamp.class); castExpressionUdfs.add(UDFToByte.class); castExpressionUdfs.add(UDFToBoolean.class); castExpressionUdfs.add(UDFToDouble.class); @@ -354,7 +352,7 @@ private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, List childrenWithCasts = new ArrayList(); boolean atleastOneCastNeeded = false; for (ExprNodeDesc child : children) { - ExprNodeDesc castExpression = getImplicitCastExpression(child, commonType); + ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType); if (castExpression != null) { atleastOneCastNeeded = true; childrenWithCasts.add(castExpression); @@ -393,10 +391,19 @@ private TypeInfo updatePrecision(TypeInfo inputTypeInfo, DecimalTypeInfo returnT return new DecimalTypeInfo(precision, scale); } - private ExprNodeDesc getImplicitCastExpression(ExprNodeDesc child, TypeInfo castType) { + /** + * The GenericUDFs might need their children output to be cast to the given castType. + * This method returns a cast expression that would achieve the required casting. + */ + private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) { TypeInfo inputTypeInfo = child.getTypeInfo(); String inputTypeString = inputTypeInfo.getTypeName(); String castTypeString = castType.getTypeName(); + + if (inputTypeString.equals(castTypeString)) { + // Nothing to be done + return null; + } boolean inputTypeDecimal = false; boolean castTypeDecimal = false; if (decimalTypePattern.matcher(inputTypeString).matches()) { @@ -406,72 +413,82 @@ private ExprNodeDesc getImplicitCastExpression(ExprNodeDesc child, TypeInfo cast castTypeDecimal = true; } - // If castType is decimal, try not to lose precision for numeric types. - if (castTypeDecimal) { - castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType); - } - if (castTypeDecimal && !inputTypeDecimal) { + // Cast the input to decimal + // If castType is decimal, try not to lose precision for numeric types. + castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType); GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal(); List children = new ArrayList(); children.add(child); ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children); return desc; } else if (!castTypeDecimal && inputTypeDecimal) { + // Cast decimal input to returnType - UDF udfClass = null; - GenericUDF genericUdf = null; - PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = - ((PrimitiveTypeInfo) castType).getPrimitiveCategory(); - switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) { - case BYTE: - udfClass = new UDFToByte(); - break; - case SHORT: - udfClass = new UDFToShort(); - break; - case INT: - udfClass = new UDFToInteger(); - break; - case LONG: - udfClass = new UDFToLong(); - break; - case FLOAT: - udfClass = new UDFToFloat(); - break; - case DOUBLE: - udfClass = new UDFToDouble(); - break; - case STRING: - udfClass = new UDFToString(); - break; - case BOOLEAN: - udfClass = new UDFToBoolean(); - break; - case DATE: - genericUdf = new GenericUDFToDate(); - break; - case TIMESTAMP: - genericUdf = new GenericUDFToUnixTimeStamp(); - break; - case BINARY: - genericUdf = new GenericUDFToBinary(); - break; - } - if (genericUdf == null) { - genericUdf = new GenericUDFBridge(); - ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName()); - } + GenericUDF genericUdf = getGenericUDFForCast(castType); List children = new ArrayList(); children.add(child); ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); return desc; + } else { + + // Casts to exact types including long to double etc. are needed in some special cases. + if (udf instanceof GenericUDFCoalesce) { + GenericUDF genericUdf = getGenericUDFForCast(castType); + List children = new ArrayList(); + children.add(child); + ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); + return desc; + } } - // No cast needed return null; } + private GenericUDF getGenericUDFForCast(TypeInfo castType) { + UDF udfClass = null; + GenericUDF genericUdf = null; + switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) { + case BYTE: + udfClass = new UDFToByte(); + break; + case SHORT: + udfClass = new UDFToShort(); + break; + case INT: + udfClass = new UDFToInteger(); + break; + case LONG: + udfClass = new UDFToLong(); + break; + case FLOAT: + udfClass = new UDFToFloat(); + break; + case DOUBLE: + udfClass = new UDFToDouble(); + break; + case STRING: + udfClass = new UDFToString(); + break; + case BOOLEAN: + udfClass = new UDFToBoolean(); + break; + case DATE: + genericUdf = new GenericUDFToDate(); + break; + case TIMESTAMP: + genericUdf = new GenericUDFToUnixTimeStamp(); + break; + case BINARY: + genericUdf = new GenericUDFToBinary(); + break; + } + if (genericUdf == null) { + genericUdf = new GenericUDFBridge(); + ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName()); + } + return genericUdf; + } /* Return true if this is one of a small set of functions for which @@ -568,7 +585,10 @@ private ExprNodeDesc foldConstantsForUnaryExpression(ExprNodeDesc exprDesc) thro } GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF(); - if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive) { + if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive + || castExpressionUdfs.contains(gudf) + || ((gudf instanceof GenericUDFBridge) + && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) { ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc); ObjectInspector output = evaluator.initialize(null); Object constant = evaluator.evaluate(null); @@ -775,6 +795,9 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List childExpr, Mode mode, TypeInfo returnType) throws HiveException { + + List constantFoldedChildren = foldConstantsForUnaryExprs(childExpr); + childExpr = constantFoldedChildren; //First handle special cases if (udf instanceof GenericUDFBetween) { return getBetweenFilterExpression(childExpr, mode); @@ -782,6 +805,10 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, return getInExpression(childExpr, mode); } else if (udf instanceof GenericUDFOPPositive) { return getIdentityExpression(childExpr); + } else if (udf instanceof GenericUDFCoalesce) { + + // Coalesce is a special case because it can take variable number of arguments. + return getCoalesceExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); @@ -798,7 +825,6 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, udfClass = ((GenericUDFBridge) udf).getUdfClass(); } - List constantFoldedChildren = foldConstantsForUnaryExprs(childExpr); VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode, returnType); if (ve == null) { @@ -808,6 +834,33 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, return ve; } + private VectorExpression getCoalesceExpression(List childExpr, TypeInfo returnType) + throws HiveException { + int[] inputColumns = new int[childExpr.size()]; + VectorExpression[] vectorChildren = null; + try { + vectorChildren = getVectorExpressions(childExpr, Mode.PROJECTION); + + int i = 0; + for (VectorExpression ve : vectorChildren) { + inputColumns[i++] = ve.getOutputColumn(); + } + + int outColumn = ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName())); + VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn); + vectorCoalesce.setOutputType(returnType.getTypeName()); + vectorCoalesce.setChildExpressions(vectorChildren); + return vectorCoalesce; + } finally { + // Free the output columns of the child expressions. + if (vectorChildren != null) { + for (VectorExpression v : vectorChildren) { + ocm.freeOutputColumn(v.getOutputColumn()); + } + } + } + } + /** * Create a filter or boolean-valued expression for column IN ( ) */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java new file mode 100644 index 0000000..8ca84b7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import java.util.Arrays; + +/** + * This expression returns the value of the first non-null expression + * in the given set of inputs expressions. + */ +public class VectorCoalesce extends VectorExpression { + + private static final long serialVersionUID = 1L; + private int [] inputColumns; + private int outputColumn; + + public VectorCoalesce(int [] inputColumns, int outputColumn) { + this(); + this.inputColumns = inputColumns; + this.outputColumn = outputColumn; + } + + public VectorCoalesce() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + int[] sel = batch.selected; + int n = batch.size; + ColumnVector outputVector = batch.cols[outputColumn]; + if (n <= 0) { + // Nothing to do + return; + } + + outputVector.init(); + + outputVector.noNulls = false; + outputVector.isRepeating = false; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector.isNull[i] = true; + for (int k = 0; k < inputColumns.length; k++) { + ColumnVector cv = batch.cols[inputColumns[k]]; + if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { + outputVector.isNull[i] = false; + outputVector.setElement(i, 0, cv); + break; + } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { + outputVector.isNull[i] = false; + outputVector.setElement(i, i, cv); + break; + } + } + } + } else { + for (int i = 0; i != n; i++) { + outputVector.isNull[i] = true; + for (int k = 0; k < inputColumns.length; k++) { + ColumnVector cv = batch.cols[inputColumns[k]]; + if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { + outputVector.isNull[i] = false; + outputVector.setElement(i, 0, cv); + break; + } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { + outputVector.isNull[i] = false; + outputVector.setElement(i, i, cv); + break; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return outputType; + } + + public int [] getInputColumns() { + return inputColumns; + } + + public void setInputColumns(int [] inputColumns) { + this.inputColumns = inputColumns; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // Descriptor is not defined because it takes variable number of arguments with different + // data types. + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 21fe8ca..e6be03f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -230,6 +230,7 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFIn.class); supportedGenericUDFs.add(GenericUDFCase.class); supportedGenericUDFs.add(GenericUDFWhen.class); + supportedGenericUDFs.add(GenericUDFCoalesce.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); diff --git ql/src/test/queries/clientpositive/vector_coalesce.q ql/src/test/queries/clientpositive/vector_coalesce.q new file mode 100644 index 0000000..052ab71 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_coalesce.q @@ -0,0 +1,32 @@ +SET hive.vectorized.execution.enabled=true; +EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10; + +SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10; + +EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10; + +SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10; + +EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10; + +SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10; + +EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10; + +SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10; diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out new file mode 100644 index 0000000..11fbadd --- /dev/null +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -0,0 +1,256 @@ +PREHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 3143 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cdouble is null (type: boolean) + Statistics: Num rows: 1571 Data size: 188558 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdouble (type: double), cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(cdouble,cstring1,cint,cfloat,csmallint) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1571 Data size: 188558 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) +FROM alltypesorc +WHERE (cdouble IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL LFgU5WT87C2yJ4W4YU0r8Pp -285355633 -51.0 NULL LFgU5WT87C2yJ4W4YU0r8Pp +NULL 75bFXC7TqGo1SEaYAx4C58m NULL -51.0 NULL 75bFXC7TqGo1SEaYAx4C58m +NULL v3p153e2bSkGS70v04G 354670578 -51.0 NULL v3p153e2bSkGS70v04G +NULL 0pOH7A4O8aQ37NuBqn 951003458 -51.0 NULL 0pOH7A4O8aQ37NuBqn +NULL 8ShAFcD734S8Q26WjMwpq0Q 164554497 -51.0 NULL 8ShAFcD734S8Q26WjMwpq0Q +NULL nOF31ehjY7ULCHMf 455419170 -51.0 NULL nOF31ehjY7ULCHMf +NULL t32s57Cjt4a250qQgVNAB5T -109813638 -51.0 NULL t32s57Cjt4a250qQgVNAB5T +NULL nvO822k30OaH37Il 665801232 -51.0 NULL nvO822k30OaH37Il +NULL M152O -601502867 -51.0 NULL M152O +NULL FgJ7Hft6845s1766oyt82q 199879534 -51.0 NULL FgJ7Hft6845s1766oyt82q +PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is null (type: boolean) + Statistics: Num rows: 11788 Data size: 188610 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cdouble (type: double), cint (type: int), COALESCE((ctinyint + 10),(cdouble + log2(cint)),0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11788 Data size: 188610 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) +FROM alltypesorc +WHERE (ctinyint IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL -4213.0 528534767 -4184.022576865738 +NULL -3012.0 528534767 -2983.0225768657383 +NULL -4016.0 528534767 -3987.0225768657383 +NULL -11534.0 528534767 -11505.022576865738 +NULL -6147.0 528534767 -6118.022576865738 +NULL -7680.0 528534767 -7651.022576865738 +NULL -7314.0 528534767 -7285.022576865738 +NULL 11254.0 528534767 11282.977423134262 +NULL 13889.0 528534767 13917.977423134262 +NULL 3321.0 528534767 3349.9774231342617 +PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 31436 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cfloat is null and cbigint is null) (type: boolean) + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float), cbigint (type: bigint), COALESCE(cfloat,cbigint,0) (type: float) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7859 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) +FROM alltypesorc +WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +NULL NULL 0.0 +PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 4715 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) + Statistics: Num rows: 4715 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4715 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) +FROM alltypesorc +WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +1969-12-31 15:59:46.674 1969-12-31 16:00:08.875 1969-12-31 15:59:46.674 +NULL 1969-12-31 16:00:13.589 1969-12-31 16:00:13.589 +1969-12-31 15:59:55.787 1969-12-31 16:00:01.546 1969-12-31 15:59:55.787 +1969-12-31 15:59:44.187 1969-12-31 16:00:06.961 1969-12-31 15:59:44.187 +1969-12-31 15:59:50.434 1969-12-31 16:00:13.352 1969-12-31 15:59:50.434 +1969-12-31 16:00:15.007 1969-12-31 16:00:15.148 1969-12-31 16:00:15.007 +1969-12-31 16:00:07.021 1969-12-31 16:00:02.997 1969-12-31 16:00:07.021 +1969-12-31 16:00:04.963 1969-12-31 15:59:56.474 1969-12-31 16:00:04.963 +1969-12-31 15:59:52.176 1969-12-31 16:00:07.787 1969-12-31 15:59:52.176 +1969-12-31 15:59:44.569 1969-12-31 15:59:51.665 1969-12-31 15:59:44.569