From 3baeaabe4f9552f46fdb940d002dae7d7fe8047d Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Mon, 18 May 2015 16:52:55 -0700 Subject: [PATCH] HIVE-10745 : Better null handling by Vectorizer --- .../hive/ql/exec/ExprNodeEvaluatorFactory.java | 2 - .../hive/ql/exec/vector/VectorizationContext.java | 46 +++++++++++++--------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java index f08321c..5a532c4 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java @@ -27,8 +27,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.NullWritable; /** * ExprNodeEvaluatorFactory. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 48f34a9..f49b562 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.lang.reflect.Constructor; -import java.math.BigDecimal; import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; @@ -37,7 +36,6 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -129,8 +127,8 @@ private static final Log LOG = LogFactory.getLog( VectorizationContext.class.getName()); - private String contextName; - private int level; + private final String contextName; + private final int level; VectorExpressionDescriptor vMap; @@ -359,7 +357,7 @@ void freeOutputColumn(int index) { } public int[] currentScratchColumns() { - TreeSet treeSet = new TreeSet(); + TreeSet treeSet = new TreeSet(); for (Integer col : usedOutputColumns) { treeSet.add(initialOutputCol + col); } @@ -440,8 +438,6 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); } - } else if (exprDesc instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)exprDesc).getValue()) { - ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode); } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); @@ -691,6 +687,10 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException case DECIMAL: genericUdf = new GenericUDFToDecimal(); break; + case VOID: + case UNKNOWN: + // fall-through to throw exception, its not expected for execution to reach here. + break; } if (genericUdf == null) { if (udfClass == null) { @@ -1345,8 +1345,11 @@ private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge ud } private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { + + if (null == scalar) { + return null; + } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - int scale = HiveDecimalUtils.getScaleForType(ptinfo); String typename = type.getTypeName(); HiveDecimal rawDecimal; switch (ptinfo.getPrimitiveCategory()) { @@ -1384,6 +1387,9 @@ private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws H } private String castConstantToString(Object scalar, TypeInfo type) throws HiveException { + if (null == scalar) { + return null; + } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); switch (ptinfo.getPrimitiveCategory()) { @@ -1403,6 +1409,9 @@ private String castConstantToString(Object scalar, TypeInfo type) throws HiveExc } private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException { + if (null == scalar) { + return null; + } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); switch (ptinfo.getPrimitiveCategory()) { @@ -1422,6 +1431,9 @@ private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveExc } private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException { + if (null == scalar) { + return null; + } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); switch (ptinfo.getPrimitiveCategory()) { @@ -1449,8 +1461,6 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo Object constantValue = ((ExprNodeConstantDesc) child).getValue(); HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo()); return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION); - } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) { - return getConstantVectorExpression(null, returnType, Mode.PROJECTION); } if (isIntFamily(inputType)) { return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType); @@ -1476,8 +1486,6 @@ private VectorExpression getCastToString(List childExpr, TypeInfo Object constantValue = ((ExprNodeConstantDesc) child).getValue(); String strValue = castConstantToString(constantValue, child.getTypeInfo()); return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION); - } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) { - return getConstantVectorExpression(null, returnType, Mode.PROJECTION); } if (inputType.equals("boolean")) { // Boolean must come before the integer family. It's a special case. @@ -1563,8 +1571,6 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr) ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { + if (null == ((ExprNodeConstantDesc)child).getValue()) { + return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); + } // Don't do constant folding here. Wait until the optimizer is changed to do it. // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. return null; - } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) { - return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); } // Long and double are handled using descriptors, string needs to be specially handled. if (isStringFamily(inputType)) { @@ -1619,8 +1626,6 @@ private VectorExpression getCastToLongExpression(List childExpr) Object constantValue = ((ExprNodeConstantDesc) child).getValue(); Long longValue = castConstantToLong(constantValue, child.getTypeInfo()); return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); - } else if (child instanceof ExprNodeConstantDesc && null == ((ExprNodeConstantDesc)child).getValue()) { - return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); } // Float family, timestamp are handled via descriptor based lookup, int family needs // special handling. @@ -1770,7 +1775,10 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) variableArgPositions.add(i); argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); } else if (child instanceof ExprNodeConstantDesc) { - + if (((ExprNodeConstantDesc) child).getValue() == null) { + // cannot handle constant null at the moment + throw new HiveException("Unable to vectorize Custom UDF"); + } // this is a constant argDescs[i].setConstant((ExprNodeConstantDesc) child); } else { -- 1.7.12.4 (Apple Git-37)