diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index be5a747..fdeff70 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -603,6 +603,7 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_number_compare_projection.q,\ vector_partitioned_date_time.q,\ vector_udf1.q,\ + vector_when_coalesce.q,\ vectorization_short_regress.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_ptf.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f6b6447..633a6e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -397,6 +397,7 @@ int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { // which could lead to a lot of extra unnecessary scratch columns. String vectorTypeName = getScratchName(typeInfo); int relativeCol = allocateOutputColumnInternal(vectorTypeName); + // System.out.println("VECTORIZATION_CONTEXT: allocate scratch column " + (initialOutputCol + relativeCol)); return initialOutputCol + relativeCol; } @@ -435,6 +436,7 @@ void freeOutputColumn(int index) { } int colIndex = index-initialOutputCol; if (colIndex >= 0) { + // System.out.println("VECTORIZATION_CONTEXT: freeing scratch column " + (initialOutputCol + colIndex)); usedOutputColumns.remove(index-initialOutputCol); } } @@ -457,7 +459,8 @@ public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { } private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc - exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException { + exprDesc, VectorExpressionDescriptor.Mode mode, List deferredScratchColDeallocList) + throws HiveException { int columnNum = getInputColumnIndex(exprDesc.getColumn()); VectorExpression expr = null; switch (mode) { @@ -473,12 +476,13 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprAsList.add(exprDesc); // First try our cast method that will handle a few special cases. - VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); + VectorExpression castToBooleanExpr = + getCastToBoolean(exprAsList, deferredScratchColDeallocList); if (castToBooleanExpr == null) { // Ok, try the UDF. castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, - VectorExpressionDescriptor.Mode.PROJECTION, null); + VectorExpressionDescriptor.Mode.PROJECTION, null, deferredScratchColDeallocList); if (castToBooleanExpr == null) { throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean"); @@ -526,9 +530,23 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveEx * @throws HiveException */ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException { + List deferredScratchColDeallocList = new ArrayList(); + VectorExpression ve = + getInternalVectorExpression(exprDesc, mode, deferredScratchColDeallocList); + if (deferredScratchColDeallocList.size() > 0) { + for (int scratchColNum : deferredScratchColDeallocList) { + ocm.freeOutputColumn(scratchColNum); + } + } + return ve; + } + + private VectorExpression getInternalVectorExpression(ExprNodeDesc exprDesc, + VectorExpressionDescriptor.Mode mode, List deferredScratchColDeallocList) + throws HiveException { VectorExpression ve = null; if (exprDesc instanceof ExprNodeColumnDesc) { - ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); + ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode, deferredScratchColDeallocList); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; // Add cast expression if needed. Child expressions of a udf may return different data types @@ -539,7 +557,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress List childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(), exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), - childExpressions, mode, exprDesc.getTypeInfo()); + childExpressions, mode, exprDesc.getTypeInfo(), deferredScratchColDeallocList); if (ve == null) { // Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor // when configured. @@ -556,7 +574,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress + " because hive.vectorized.adaptor.usage.mode=none"); case CHOSEN: if (isNonVectorizedPathUDF(expr, mode)) { - ve = getCustomUDFExpression(expr, mode); + ve = getCustomUDFExpression(expr, mode, deferredScratchColDeallocList); } else { throw new HiveException( "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() @@ -569,7 +587,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress LOG.debug("We will try to use the VectorUDFAdaptor for " + exprDesc.toString() + " because hive.vectorized.adaptor.usage.mode=all"); } - ve = getCustomUDFExpression(expr, mode); + ve = getCustomUDFExpression(expr, mode, deferredScratchColDeallocList); break; default: throw new RuntimeException("Unknown hive vector adaptor usage mode " + @@ -1045,6 +1063,8 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI int outCol = -1; if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { outCol = ocm.allocateOutputColumn(typeInfo); + // System.out.println("VECTORIZATION_CONTEXT: (getConstantVectorExpression PROJECTION) allocated scratch column " + outCol + + // " value " + (constantValue == null ? "NULL" : constantValue)); } if (constantValue == null) { return new ConstantVectorExpression(outCol, typeName, true); @@ -1099,14 +1119,15 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI * and casting boolean to long. IdentityExpression and its children are always * projections. */ - private VectorExpression getIdentityExpression(List childExprList) - throws HiveException { + private VectorExpression getIdentityExpression(List childExprList, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc childExpr = childExprList.get(0); int inputCol; String colType; VectorExpression v1 = null; if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); + v1 = getInternalVectorExpression(childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + deferredScratchColDeallocList); inputCol = v1.getOutputColumn(); colType = v1.getOutputType(); } else if (childExpr instanceof ExprNodeColumnDesc) { @@ -1125,7 +1146,7 @@ private VectorExpression getIdentityExpression(List childExprList) private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, - TypeInfo returnType) throws HiveException { + TypeInfo returnType, List deferredScratchColDeallocList) throws HiveException { int numChildren = (childExpr == null) ? 0 : childExpr.size(); @@ -1158,7 +1179,7 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, throw new RuntimeException("Unexpected multi-child UDF"); } VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); + return createVectorExpression(vclass, childExpr, childrenMode, returnType, deferredScratchColDeallocList); } if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { return null; @@ -1194,11 +1215,12 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf, return null; } VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); + return createVectorExpression(vclass, childExpr, childrenMode, returnType, deferredScratchColDeallocList); } private VectorExpression createVectorExpression(Class vectorClass, - List childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException { + List childExpr, VectorExpressionDescriptor.Mode childrenMode, + TypeInfo returnType, List deferredScratchColDeallocList) throws HiveException { int numChildren = childExpr == null ? 0: childExpr.size(); VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren]; List children = new ArrayList(); @@ -1212,7 +1234,7 @@ private VectorExpression createVectorExpression(Class vectorClass, throw new HiveException("No vector type for " + vectorClass.getSimpleName() + " argument #" + i + " type name " + undecoratedName); } if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); + VectorExpression vChild = getInternalVectorExpression(child, childrenMode, deferredScratchColDeallocList); children.add(vChild); arguments[i] = vChild.getOutputColumn(); } else if (child instanceof ExprNodeColumnDesc) { @@ -1234,13 +1256,18 @@ private VectorExpression createVectorExpression(Class vectorClass, if ((vectorExpression != null) && !children.isEmpty()) { vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); } + List newDeferredScratchColDeallocList = vectorExpression.getDeferredScratchColDeallocList(); + if (newDeferredScratchColDeallocList != null && newDeferredScratchColDeallocList.size() > 0) { + deferredScratchColDeallocList.addAll(newDeferredScratchColDeallocList); + } else { + for (VectorExpression child : children) { + // System.out.println("VECTORIZATION_CONTEXT: going to call freeOutputColumn for vector expression " + child.toString()); + ocm.freeOutputColumn(child.getOutputColumn()); + } + } return vectorExpression; } catch (Exception ex) { throw new HiveException(ex); - } finally { - for (VectorExpression ve : children) { - ocm.freeOutputColumn(ve.getOutputColumn()); - } } } @@ -1306,6 +1333,7 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve = (VectorExpression) ctor.newInstance(newArgs); ve.setOutputType(returnTypeName); + // System.out.println("VECTORIZATION_CONTEXT: (instantiateExpression) allocated scratch column " + outputCol + " for vector expression " + ve.toString()); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + StringUtils.stringifyException(ex)); @@ -1323,7 +1351,8 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT } private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { List castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; @@ -1332,11 +1361,11 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, // it returns null. VectorExpression ve = null; if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); + ve = getBetweenFilterExpression(childExpr, mode, returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFIn) { - ve = getInExpression(childExpr, mode, returnType); + ve = getInExpression(childExpr, mode, returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFOPPositive) { - ve = getIdentityExpression(childExpr); + ve = getIdentityExpression(childExpr, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { // Coalesce is a special case because it can take variable number of arguments. @@ -1348,15 +1377,16 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, ve = getEltExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, - returnType); + returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFToDecimal) { - ve = getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFToChar) { - ve = getCastToChar(childExpr, returnType); + ve = getCastToChar(childExpr, returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFToVarchar) { - ve = getCastToVarChar(childExpr, returnType); + ve = getCastToVarChar(childExpr, returnType, deferredScratchColDeallocList); } else if (udf instanceof GenericUDFTimestamp) { - ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType, + deferredScratchColDeallocList); } if (ve != null) { return ve; @@ -1370,19 +1400,22 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, } ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), - udfClass, castedChildren, mode, returnType); + udfClass, castedChildren, mode, returnType, deferredScratchColDeallocList); return ve; } private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); + List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { + VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, + returnType, deferredScratchColDeallocList); // Replace with the milliseconds conversion if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestamp) { ve = createVectorExpression(CastMillisecondsLongToTimestamp.class, - childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType, + deferredScratchColDeallocList); } return ve; @@ -1391,55 +1424,60 @@ private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, private VectorExpression getCoalesceExpression(List childExpr, TypeInfo returnType) throws HiveException { int[] inputColumns = new int[childExpr.size()]; - VectorExpression[] vectorChildren = null; - try { - vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); + VectorExpression[] vectorChildren = + getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); - int i = 0; - for (VectorExpression ve : vectorChildren) { - inputColumns[i++] = ve.getOutputColumn(); - } + int i = 0; + for (VectorExpression ve : vectorChildren) { + inputColumns[i++] = ve.getOutputColumn(); + } - int outColumn = ocm.allocateOutputColumn(returnType); - VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn); - vectorCoalesce.setOutputType(returnType.getTypeName()); - vectorCoalesce.setChildExpressions(vectorChildren); - return vectorCoalesce; - } finally { - // Free the output columns of the child expressions. - if (vectorChildren != null) { - for (VectorExpression v : vectorChildren) { - ocm.freeOutputColumn(v.getOutputColumn()); - } + int outColumn = ocm.allocateOutputColumn(returnType); + VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn); + vectorCoalesce.setOutputType(returnType.getTypeName()); + vectorCoalesce.setChildExpressions(vectorChildren); + // System.out.println("VECTORIZATION_CONTEXT: (getCoalesceExpression) allocated output scratch column " + outColumn + " for vector expression " + vectorCoalesce.toString()); + + if (vectorChildren != null) { + List deferredScratchColDeallocList = new ArrayList(); + for (VectorExpression v : vectorChildren) { + deferredScratchColDeallocList.add(v.getOutputColumn()); } + vectorCoalesce.setDeferredScratchColDeallocList(deferredScratchColDeallocList); + // System.out.println("VECTORIZATION_CONTEXT: (getCoalesceExpression) deferred scratch column list " + deferredScratchColDeallocList.toString()); } + + return vectorCoalesce; } private VectorExpression getEltExpression(List childExpr, TypeInfo returnType) throws HiveException { int[] inputColumns = new int[childExpr.size()]; - VectorExpression[] vectorChildren = null; - try { - vectorChildren = getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); + VectorExpression[] vectorChildren = + getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); - int i = 0; - for (VectorExpression ve : vectorChildren) { - inputColumns[i++] = ve.getOutputColumn(); - } + int i = 0; + for (VectorExpression ve : vectorChildren) { + inputColumns[i++] = ve.getOutputColumn(); + } - int outColumn = ocm.allocateOutputColumn(returnType); - VectorElt vectorElt = new VectorElt(inputColumns, outColumn); - vectorElt.setOutputType(returnType.getTypeName()); - vectorElt.setChildExpressions(vectorChildren); - return vectorElt; - } finally { - // Free the output columns of the child expressions. - if (vectorChildren != null) { - for (VectorExpression v : vectorChildren) { - ocm.freeOutputColumn(v.getOutputColumn()); - } + + int outColumn = ocm.allocateOutputColumn(returnType); + VectorElt vectorElt = new VectorElt(inputColumns, outColumn); + vectorElt.setOutputType(returnType.getTypeName()); + vectorElt.setChildExpressions(vectorChildren); + + if (vectorChildren != null) { + List deferredScratchColDeallocList = new ArrayList(); + for (VectorExpression v : vectorChildren) { + deferredScratchColDeallocList.add(v.getOutputColumn()); } + vectorElt.setDeferredScratchColDeallocList(deferredScratchColDeallocList); + // System.out.println("VECTORIZATION_CONTEXT: (getEltExpression) deferred scratch column list " + deferredScratchColDeallocList.toString()); } + + return vectorElt; + } public enum InConstantType { @@ -1490,8 +1528,8 @@ public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCat } private VectorExpression getStructInExpression(List childExpr, ExprNodeDesc colExpr, - TypeInfo colTypeInfo, List inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { + TypeInfo colTypeInfo, List inChildren, VectorExpressionDescriptor.Mode mode, + TypeInfo returnType, List deferredScratchColDeallocList) throws HiveException { VectorExpression expr = null; @@ -1613,7 +1651,8 @@ private VectorExpression getStructInExpression(List childExpr, Exp Class cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class); - expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, + deferredScratchColDeallocList); ((IStringInExpr) expr).setInListValues(serializedInChildren); @@ -1628,7 +1667,8 @@ private VectorExpression getStructInExpression(List childExpr, Exp * Create a filter or boolean-valued expression for column IN ( ) */ private VectorExpression getInExpression(List childExpr, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + VectorExpressionDescriptor.Mode mode, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc colExpr = childExpr.get(0); List inChildren = childExpr.subList(1, childExpr.size()); @@ -1637,7 +1677,8 @@ private VectorExpression getInExpression(List childExpr, TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType); Category category = colTypeInfo.getCategory(); if (category == Category.STRUCT) { - return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType); + return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType, + deferredScratchColDeallocList); } else if (category != Category.PRIMITIVE) { return null; } @@ -1672,7 +1713,8 @@ private VectorExpression getInExpression(List childExpr, for (int i = 0; i != inVals.length; i++) { inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((ILongInExpr) expr).setInListValues(inVals); } else if (isTimestampFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class); @@ -1680,7 +1722,8 @@ private VectorExpression getInExpression(List childExpr, for (int i = 0; i != inVals.length; i++) { inVals[i] = getTimestampScalar(childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((ITimestampInExpr) expr).setInListValues(inVals); } else if (isStringFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class); @@ -1688,7 +1731,8 @@ private VectorExpression getInExpression(List childExpr, for (int i = 0; i != inVals.length; i++) { inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((IStringInExpr) expr).setInListValues(inVals); } else if (isFloatFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class); @@ -1696,7 +1740,8 @@ private VectorExpression getInExpression(List childExpr, for (int i = 0; i != inValsD.length; i++) { inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((IDoubleInExpr) expr).setInListValues(inValsD); } else if (isDecimalFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class); @@ -1705,7 +1750,8 @@ private VectorExpression getInExpression(List childExpr, inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( (ExprNodeConstantDesc) childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((IDecimalInExpr) expr).setInListValues(inValsD); } else if (isDateFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); @@ -1713,7 +1759,8 @@ private VectorExpression getInExpression(List childExpr, for (int i = 0; i != inVals.length; i++) { inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i)); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + expr = createVectorExpression(cl, childExpr.subList(0, 1), + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); ((ILongInExpr) expr).setInListValues(inVals); } @@ -1750,22 +1797,24 @@ private PrimitiveCategory getAnyIntegerPrimitiveCategoryFromUdfClass(Class childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { Class cl = udf.getUdfClass(); VectorExpression ve = null; if (isCastToIntFamily(cl)) { PrimitiveCategory integerPrimitiveCategory = getAnyIntegerPrimitiveCategoryFromUdfClass(cl); - ve = getCastToLongExpression(childExpr, integerPrimitiveCategory); + ve = getCastToLongExpression(childExpr, integerPrimitiveCategory, deferredScratchColDeallocList); } else if (cl.equals(UDFToBoolean.class)) { - ve = getCastToBoolean(childExpr); + ve = getCastToBoolean(childExpr, deferredScratchColDeallocList); } else if (isCastToFloatFamily(cl)) { - ve = getCastToDoubleExpression(cl, childExpr, returnType); + ve = getCastToDoubleExpression(cl, childExpr, returnType, deferredScratchColDeallocList); } else if (cl.equals(UDFToString.class)) { - ve = getCastToString(childExpr, returnType); + ve = getCastToString(childExpr, returnType, deferredScratchColDeallocList); } if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { - ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode); + ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode, + deferredScratchColDeallocList); } return ve; } @@ -1908,8 +1957,8 @@ private Long castConstantToLong(Object scalar, TypeInfo type, } } - private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType) - throws HiveException { + private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -1919,22 +1968,26 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo return getConstantVectorExpression(decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); } if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastLongToDecimal.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isFloatFamily(inputType)) { - return createVectorExpression(CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDoubleToDecimal.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (decimalTypePattern.matcher(inputType).matches()) { - return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, - returnType); + return createVectorExpression(CastDecimalToDecimal.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastStringToDecimal.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (inputType.equals("timestamp")) { - return createVectorExpression(CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastTimestampToDecimal.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } return null; } - private VectorExpression getCastToString(List childExpr, TypeInfo returnType) - throws HiveException { + private VectorExpression getCastToString(List childExpr, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -1945,21 +1998,26 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } if (inputType.equals("boolean")) { // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, null); + return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, null, deferredScratchColDeallocList); } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastLongToString.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDecimalToString.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDateToString.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastStringGroupToString.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } return null; } - private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) - throws HiveException { + private VectorExpression getCastToChar(List childExpr, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -1969,21 +2027,26 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re } if (inputType.equals("boolean")) { // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastLongToChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDecimalToChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDateToChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastStringGroupToChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } return null; } - private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) - throws HiveException { + private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -1993,21 +2056,26 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo } if (inputType.equals("boolean")) { // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastLongToVarChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDecimalToVarChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastDateToVarChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(CastStringGroupToVarChar.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, returnType, deferredScratchColDeallocList); } return null; } private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, - TypeInfo returnType) throws HiveException { + TypeInfo returnType, List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -2019,23 +2087,25 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr) - throws HiveException { + private VectorExpression getCastToBoolean(List childExpr, + List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -2050,7 +2120,7 @@ private VectorExpression getCastToBoolean(List childExpr) if (isStringFamily(inputType)) { // string casts to false if it is 0 characters long, otherwise true VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, - VectorExpressionDescriptor.Mode.PROJECTION, null); + VectorExpressionDescriptor.Mode.PROJECTION, null, deferredScratchColDeallocList); int outputCol = ocm.allocateOutputColumn(TypeInfoFactory.longTypeInfo); VectorExpression lenToBoolExpr = @@ -2062,8 +2132,8 @@ private VectorExpression getCastToBoolean(List childExpr) return null; } - private VectorExpression getCastToLongExpression(List childExpr, PrimitiveCategory integerPrimitiveCategory) - throws HiveException { + private VectorExpression getCastToLongExpression(List childExpr, + PrimitiveCategory integerPrimitiveCategory, List deferredScratchColDeallocList) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { @@ -2076,7 +2146,7 @@ private VectorExpression getCastToLongExpression(List childExpr, P // special handling. if (isIntFamily(inputType)) { // integer and boolean types require no conversion, so use a no-op - return getIdentityExpression(childExpr); + return getIdentityExpression(childExpr, deferredScratchColDeallocList); } return null; } @@ -2087,8 +2157,9 @@ private VectorExpression getCastToLongExpression(List childExpr, P * needs to be done differently than the standard way where all arguments are * passed to the VectorExpression constructor. */ - private VectorExpression getBetweenFilterExpression(List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { + private VectorExpression getBetweenFilterExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType, + List deferredScratchColDeallocList) throws HiveException { if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { @@ -2175,14 +2246,16 @@ private VectorExpression getBetweenFilterExpression(List childExpr } else if (isDateFamily(colType) && notKeywordPresent) { cl = FilterLongColumnNotBetween.class; } - return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, + returnType, deferredScratchColDeallocList); } /* * Return vector expression for a custom (i.e. not built-in) UDF. */ - private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) - throws HiveException { + private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, + VectorExpressionDescriptor.Mode mode, List deferredScratchColDeallocList) + throws HiveException { boolean isFilter = false; // Assume. if (mode == VectorExpressionDescriptor.Mode.FILTER) { @@ -2217,23 +2290,26 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve for (int i = 0; i < childExprList.size(); i++) { ExprNodeDesc child = childExprList.get(i); - /* - UNDONE: Until we fix scratch column allocation to not release after each expression, we - UNDONE: cannot have another other than a column or constant in the parameter list. if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) child " + child.toString()); + // List children = child.getChildren(); + // for (ExprNodeDesc subchild : children) { + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) children " + subchild.getClass().getName() + " " + subchild.toString()); + // } + VectorExpression e = getInternalVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION, + deferredScratchColDeallocList); vectorExprs.add(e); variableArgPositions.add(i); exprResultColumnNums.add(e.getOutputColumn()); argDescs[i].setVariable(e.getOutputColumn()); - } else - */ - if (child instanceof ExprNodeColumnDesc) { + } else if (child instanceof ExprNodeColumnDesc) { variableArgPositions.add(i); argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) column child " + child.getClass().getName() + " " + getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) argDescs[i].setConstant((ExprNodeConstantDesc) child); + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) constant child " + child.getClass().getName() + " " + child.toString()); } else { throw new HiveException("Unable to use the VectorUDFAdaptor. Encountered unsupported expr desc : " + child); @@ -2248,6 +2324,7 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve // Make vectorized operator VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultTypeName, argDescs); + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) allocated scratch column " + outputCol + " for vector expression " + ve.toString()); // Set child expressions VectorExpression[] childVEs = null; @@ -2259,10 +2336,9 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve } ve.setChildExpressions(childVEs); - // Free output columns if inputs have non-leaf expression trees. - for (Integer i : exprResultColumnNums) { - ocm.freeOutputColumn(i); - } + // Free output columns later. + // System.out.println("VECTORIZATION_CONTEXT: (getCustomUDFExpression) set deferred scratch column list " + exprResultColumnNums.toString()); + deferredScratchColDeallocList.addAll(exprResultColumnNums); if (isFilter) { SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputCol); @@ -2764,4 +2840,27 @@ public int compare(Integer o1, Integer o2) { return sb.toString(); } + + static int STACK_LENGTH_LIMIT = 20; + public static String getStackTraceAsSingleLine(StackTraceElement[] stackTrace) { + StringBuilder sb = new StringBuilder(); + sb.append("Stack trace: "); + int length = stackTrace.length; + boolean isTruncated = false; + if (length > STACK_LENGTH_LIMIT) { + length = STACK_LENGTH_LIMIT; + isTruncated = true; + } + for (int i = 0; i < length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(stackTrace[i]); + } + if (isTruncated) { + sb.append(", ..."); + } + + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 8fca8a1..36a4a4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -19,9 +19,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.Serializable; +import java.util.List; import java.util.Map; import com.google.common.collect.ImmutableMap; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -71,6 +73,11 @@ public static Type getValue(String name) { protected String outputType; /** + * Optional list of scratch columns to deallocate at the root vector expression. + */ + private transient List deferredScratchColDeallocList = null; + + /** * This is the primary method to implement expression logic. * @param batch */ @@ -138,6 +145,14 @@ public void setInputTypes(Type ... inputTypes) { return inputTypes; } + public void setDeferredScratchColDeallocList(List deferredScratchColDeallocList) { + this.deferredScratchColDeallocList = deferredScratchColDeallocList; + } + + public List getDeferredScratchColDeallocList() { + return deferredScratchColDeallocList; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 2a99274..182f32d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1883,6 +1883,7 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode } catch (Exception e) { if (e instanceof HiveException) { LOG.info(e.getMessage()); + LOG.info(getStackTraceAsSingleLine(e.getStackTrace())); } else { if (LOG.isDebugEnabled()) { // Show stack trace. @@ -2544,4 +2545,27 @@ public void debugDisplayAllMaps(BaseWork work) { LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount); LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); } + + static int STACK_LENGTH_LIMIT = 20; + public static String getStackTraceAsSingleLine(StackTraceElement[] stackTrace) { + StringBuilder sb = new StringBuilder(); + sb.append("Stack trace: "); + int length = stackTrace.length; + boolean isTruncated = false; + if (length > STACK_LENGTH_LIMIT) { + length = STACK_LENGTH_LIMIT; + isTruncated = true; + } + for (int i = 0; i < length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(stackTrace[i]); + } + if (isTruncated) { + sb.append(", ..."); + } + + return sb.toString(); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index bb37a04..db42ed7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; @@ -121,6 +122,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLTrim; @@ -142,6 +144,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -424,6 +427,68 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep } @Test + public void testWhenCoalesce() throws HiveException { + + // GenericUDFWhen( + // GenericUDFOPGreaterThan( + // GenericUDFCoalesce( + // Column[_col2], Const int 5), + // Const bigint 1), + // Column[_col2], + // Const void null) + // + + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(TypeInfoFactory.longTypeInfo, "member", "table", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(TypeInfoFactory.longTypeInfo, "attr", "table", false); + + ExprNodeConstantDesc constLong1Expr = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, new Long(1L)); + ExprNodeConstantDesc constLong5Expr = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, new Long(5L)); + ExprNodeConstantDesc constNullExpr = new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); + + GenericUDFCoalesce coalesceUdf = new GenericUDFCoalesce(); + ExprNodeGenericFuncDesc coalesceExprDesc = new ExprNodeGenericFuncDesc(); + coalesceExprDesc.setTypeInfo(TypeInfoFactory.longTypeInfo); + coalesceExprDesc.setGenericUDF(coalesceUdf); + ArrayList coalesceChildren = new ArrayList(2); + coalesceChildren.add(col2Expr); + coalesceChildren.add(constLong5Expr); + coalesceExprDesc.setChildren(coalesceChildren); + + GenericUDFOPGreaterThan greaterThanUdf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterThanExprDesc = new ExprNodeGenericFuncDesc(); + greaterThanExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + greaterThanExprDesc.setGenericUDF(greaterThanUdf); + ArrayList greaterThanChildren = new ArrayList(2); + greaterThanChildren.add(coalesceExprDesc); + greaterThanChildren.add(constLong1Expr); + greaterThanExprDesc.setChildren(greaterThanChildren); + + GenericUDFWhen whenUdf = new GenericUDFWhen(); + ExprNodeGenericFuncDesc whenExprDesc = new ExprNodeGenericFuncDesc(); + whenExprDesc.setTypeInfo(TypeInfoFactory.longTypeInfo); + whenExprDesc.setGenericUDF(whenUdf); + ArrayList whenChildren = new ArrayList(3); + whenChildren.add(greaterThanExprDesc); + whenChildren.add(col2Expr); + whenChildren.add(constNullExpr); + whenExprDesc.setChildren(whenChildren); + + // System.out.println("TEST_WHEN_COALESCE whenExprDesc " + whenExprDesc.toString()); + + List columns = new ArrayList(); + columns.add("member"); + columns.add("attr"); + + HiveConf hiveConf = new HiveConf(); + hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_USAGE_MODE, "all"); + + VectorizationContext vc = new VectorizationContext("name", columns, hiveConf); + + VectorExpression ve = vc.getVectorExpression(whenExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + // System.out.println("TEST_WHEN_COALESCE ve " + ve.toString()); + } + + @Test public void testFloatInExpressions() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Float.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); diff --git ql/src/test/queries/clientpositive/vector_when_coalesce.q ql/src/test/queries/clientpositive/vector_when_coalesce.q new file mode 100644 index 0000000..6b51347 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_when_coalesce.q @@ -0,0 +1,19 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +SET hive.auto.convert.join=true; + +DROP TABLE IF EXISTS test_1; CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC; + +DROP TABLE IF EXISTS test_2; CREATE TABLE test_2 (member BIGINT) STORED AS ORC; + +INSERT INTO test_1 VALUES (3,1),(2,2); +INSERT INTO test_2 VALUES (1),(2),(3),(4); + +EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member; +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_when_coalesce.q.out ql/src/test/results/clientpositive/llap/vector_when_coalesce.q.out new file mode 100644 index 0000000..d29b8bc --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_when_coalesce.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: DROP TABLE IF EXISTS test_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: DROP TABLE IF EXISTS test_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_2 (member BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (member BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: INSERT INTO test_1 VALUES (3,1),(2,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_1 +POSTHOOK: query: INSERT INTO test_1 VALUES (3,1),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: INSERT INTO test_2 VALUES (1),(2),(3),(4) +PREHOOK: type: QUERY +PREHOOK: Output: default@test_2 +POSTHOOK: query: INSERT INTO test_2 VALUES (1),(2),(3),(4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 +PREHOOK: query: EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: m + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: member (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: member (type: bigint), attr (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +m.member attr +1 NULL +2 2 +3 NULL +4 NULL