diff --git common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java index 09af28a..9b0602d 100644 --- common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java +++ common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java @@ -395,10 +395,10 @@ private static int divideMultiPrecision(int[] inOut, int divisor) { private static int arrayValidLength(int[] array) { int len = array.length; - while (len >= 0 && array[len - 1] == 0) { + while (len > 0 && array[len - 1] == 0) { --len; } - return len < 0 ? 0 : len; + return len <= 0 ? 0 : len; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 4de9f9f..bfdd3ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -199,7 +199,7 @@ public String toString() { b.append(mode); b.append(", Argument Types = {"); for (int i = 0; i < argCount; i++) { - if (i == 0) { + if (i != 0) { b.append(","); } b.append(argTypes[i]); @@ -208,7 +208,7 @@ public String toString() { b.append(", Input Expression Types = {"); for (int i = 0; i < argCount; i++) { - if (i == 0) { + if (i != 0) { b.append(","); } b.append(exprTypes[i]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 842994e..e0bd512 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -344,7 +344,7 @@ private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, List getChildExpressionsWithImplicitCast(GenericUDF genericUDF, - List children, TypeInfo returnType) { + List children, TypeInfo returnType) throws HiveException { if (isExcludedFromCast(genericUDF)) { // No implicit cast needed @@ -407,7 +407,8 @@ private TypeInfo updatePrecision(TypeInfo inputTypeInfo, DecimalTypeInfo returnT * The GenericUDFs might need their children output to be cast to the given castType. * This method returns a cast expression that would achieve the required casting. */ - private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) { + private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) + throws HiveException { TypeInfo inputTypeInfo = child.getTypeInfo(); String inputTypeString = inputTypeInfo.getTypeName(); String castTypeString = castType.getTypeName(); @@ -457,7 +458,7 @@ private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc chil return null; } - private GenericUDF getGenericUDFForCast(TypeInfo castType) { + private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException { UDF udfClass = null; GenericUDF genericUdf = null; switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) { @@ -494,8 +495,14 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) { case BINARY: genericUdf = new GenericUDFToBinary(); break; + case DECIMAL: + genericUdf = new GenericUDFToDecimal(); + break; } if (genericUdf == null) { + if (udfClass == null) { + throw new HiveException("Could not add implicit cast for type "+castType.getTypeName()); + } genericUdf = new GenericUDFBridge(); ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName()); } @@ -713,10 +720,11 @@ private VectorExpression getVectorExpressionForUdf(Class udf, List vclass = this.vMap.getVectorExpressionClass(udf, builder.build()); + VectorExpressionDescriptor.Descriptor descriptor = builder.build(); + Class vclass = this.vMap.getVectorExpressionClass(udf, descriptor); if (vclass == null) { if (LOG.isDebugEnabled()) { - LOG.debug("No vector udf found for "+udf.getSimpleName()); + LOG.debug("No vector udf found for "+udf.getSimpleName() + ", descriptor: "+descriptor); } return null; } @@ -960,7 +968,7 @@ private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge ud } else if (isCastToFloatFamily(cl)) { return getCastToDoubleExpression(cl, childExpr, returnType); } else if (cl.equals(UDFToString.class)) { - return getCastToString(childExpr); + return getCastToString(childExpr, returnType); } return null; } @@ -1028,7 +1036,7 @@ private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws Hi return d; } - private VectorExpression getCastToString(List childExpr) + private VectorExpression getCastToString(List childExpr, TypeInfo returnType) throws HiveException { String inputType = childExpr.get(0).getTypeString(); if (inputType.equals("boolean")) { @@ -1036,6 +1044,8 @@ private VectorExpression getCastToString(List childExpr) return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, Mode.PROJECTION, null); } else if (isIntFamily(inputType)) { return createVectorExpression(CastLongToString.class, childExpr, Mode.PROJECTION, null); + } else if (isDecimalFamily(inputType)) { + return createVectorExpression(CastDecimalToString.class, childExpr, Mode.PROJECTION, returnType); } /* The string type is deliberately omitted -- the planner removes string to string casts. * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 3bc9493..6ba2ee0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -19,6 +19,7 @@ import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -28,15 +29,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; import org.apache.hadoop.io.Text; /** @@ -287,6 +280,14 @@ private void setOutputCol(ColumnVector colVec, int i, Object value) { } else { lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0; } + } else if (outputOI instanceof WritableHiveDecimalObjectInspector) { + DecimalColumnVector dcv = (DecimalColumnVector) colVec; + if (value instanceof HiveDecimal) { + dcv.vector[i].update(((HiveDecimal) value).bigDecimalValue()); + } else { + HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value); + dcv.vector[i].update(hd.bigDecimalValue()); + } } else { throw new RuntimeException("Unhandled object type " + outputOI.getTypeName()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e6be03f..8fb9715 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -34,19 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.FilterOperator; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.SelectOperator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,7 +53,9 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; @@ -687,7 +677,7 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode // TODO: this cannot happen - VectorizationContext throws in such cases. return false; } - } catch (HiveException e) { + } catch (Exception e) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to vectorize", e); } @@ -725,19 +715,22 @@ private boolean validateDataType(String type) { private VectorizationContext getVectorizationContext(Operator op, PhysicalContext pctx) { - RowResolver rr = pctx.getParseContext().getOpParseCtx().get(op).getRowResolver(); + RowSchema rs = op.getSchema(); Map cmap = new HashMap(); int columnCount = 0; - for (ColumnInfo c : rr.getColumnInfos()) { + for (ColumnInfo c : rs.getSignature()) { if (!c.getIsVirtualCol()) { cmap.put(c.getInternalName(), columnCount++); } } - Table tab = pctx.getParseContext().getTopToTable().get(op); - if (tab.getPartitionKeys() != null) { - for (FieldSchema fs : tab.getPartitionKeys()) { - cmap.put(fs.getName(), columnCount++); + PrunedPartitionList partList = pctx.getParseContext().getOpToPartList().get(op); + if (partList != null) { + Table tab = partList.getSourceTable(); + if (tab.getPartitionKeys() != null) { + for (FieldSchema fs : tab.getPartitionKeys()) { + cmap.put(fs.getName(), columnCount++); + } } } return new VectorizationContext(cmap, columnCount); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index 54c665e..0010c1a 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -42,7 +43,7 @@ * */ @VectorizedExpressions({CastLongToBooleanViaLongToLong.class, - CastDoubleToBooleanViaDoubleToLong.class}) + CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class}) public class UDFToBoolean extends UDF { private final BooleanWritable booleanWritable = new BooleanWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java index db4eafa..9d283bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java @@ -21,18 +21,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleScalarModuloDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleScalarModuloLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -49,7 +38,9 @@ LongColModuloLongScalar.class, LongColModuloDoubleScalar.class, DoubleColModuloLongScalar.class, DoubleColModuloDoubleScalar.class, LongScalarModuloLongColumn.class, LongScalarModuloDoubleColumn.class, - DoubleScalarModuloLongColumn.class, DoubleScalarModuloDoubleColumn.class}) + DoubleScalarModuloLongColumn.class, DoubleScalarModuloDoubleColumn.class, + DecimalColModuloDecimalColumn.class, DecimalColModuloDecimalScalar.class, + DecimalScalarModuloDecimalColumn.class}) public class GenericUDFOPMod extends GenericUDFBaseNumeric { public GenericUDFOPMod() { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java index e2529d2..ba4fed7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToTimestampViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToTimestampViaLongToLong; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -39,7 +40,7 @@ * */ @VectorizedExpressions({CastLongToTimestampViaLongToLong.class, - CastDoubleToTimestampViaDoubleToLong.class}) + CastDoubleToTimestampViaDoubleToLong.class, CastDecimalToTimestamp.class}) public class GenericUDFTimestamp extends GenericUDF { private transient PrimitiveObjectInspector argumentOI;