diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java index fc2a0e1970..9031d70a97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java @@ -21,6 +21,7 @@ import com.google.common.base.Splitter; import com.google.common.collect.Sets; +import org.apache.hive.common.util.AnnotationUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.JavaUtils; @@ -158,12 +159,22 @@ private FunctionInfo registerUDF(String functionName, FunctionType functionType, Class UDFClass, boolean isOperator, String displayName, FunctionResource... resources) { validateClass(UDFClass, UDF.class); + validateDescription(UDFClass); FunctionInfo fI = new FunctionInfo(functionType, displayName, new GenericUDFBridge(displayName, isOperator, UDFClass.getName()), resources); addFunction(functionName, fI); return fI; } + private void validateDescription(Class input) { + Description description = AnnotationUtils.getAnnotation(input, Description.class); + if (description == null) { + throw new RuntimeException("UDF Class " + input + + " does not have description. Please annotate the class with the " + + "org.apache.hadoop.hive.ql.exec.Description annotation and provide the description of the function."); + } + } + public FunctionInfo registerGenericUDF(String functionName, Class genericUDFClass, FunctionResource... resources) { FunctionType functionType = isNative ? FunctionType.BUILTIN : FunctionType.TEMPORARY; @@ -173,6 +184,7 @@ public FunctionInfo registerGenericUDF(String functionName, private FunctionInfo registerGenericUDF(String functionName, FunctionType functionType, Class genericUDFClass, FunctionResource... resources) { validateClass(genericUDFClass, GenericUDF.class); + validateDescription(genericUDFClass); FunctionInfo fI = new FunctionInfo(functionType, functionName, ReflectionUtil.newInstance(genericUDFClass, null), resources); addFunction(functionName, fI); @@ -204,6 +216,7 @@ public FunctionInfo registerGenericUDTF(String functionName, private FunctionInfo registerGenericUDTF(String functionName, FunctionType functionType, Class genericUDTFClass, FunctionResource... resources) { validateClass(genericUDTFClass, GenericUDTF.class); + validateDescription(genericUDTFClass); FunctionInfo fI = new FunctionInfo(functionType, functionName, ReflectionUtil.newInstance(genericUDTFClass, null), resources); addFunction(functionName, fI); @@ -218,6 +231,7 @@ public FunctionInfo registerGenericUDAF(String functionName, private FunctionInfo registerGenericUDAF(String functionName, FunctionType functionType, GenericUDAFResolver genericUDAFResolver, FunctionResource... resources) { + validateDescription(genericUDAFResolver.getClass()); FunctionInfo function = new WindowFunctionInfo(functionType, functionName, genericUDAFResolver, resources); addFunction(functionName, function); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java index 511d9641c3..48645dc3f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java @@ -31,7 +31,6 @@ @Target(ElementType.TYPE) @Documented public @interface WindowFunctionDescription { - Description description (); /** * controls whether this function can be applied to a Window. *

diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index a7f4bf1fcc..e6c8021c19 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean; @@ -50,6 +51,24 @@ @VectorizedExpressions({CastLongToBooleanViaLongToLong.class, CastDateToBoolean.class, CastTimestampToBoolean.class, CastStringToBoolean.class, CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class, CastStringToLong.class}) +@Description(name = "boolean", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, float, double, decimal:\n" + + " x == 0 -> false\n" + + " x != 0 -> true\n" + + "- string:\n" + + " x is '', 'false', 'no', 'zero', 'off' -> false\n" + + " true otherwise\n" + + "- date: always NULL\n" + + "- timestamp\n" + + " seconds or nanos are 0 -> false\n" + + " true otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(0);\n" + + " false" +) public class UDFToBoolean extends UDF { private final BooleanWritable booleanWritable = new BooleanWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java index 8f4ec3b1ef..405f5e00aa 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; @@ -44,6 +45,23 @@ */ @VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class, CastStringToLong.class}) +@Description(name = "tinyint", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, float, double, decimal, timestamp:\n" + + " x fits into the type _FUNC_ -> integer part of x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1\n" + + " false -> 0\n" + + "- string:\n" + + " x is a valid integer -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1" +) public class UDFToByte extends UDF { private final ByteWritable byteWritable = new ByteWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java index 7a0145243d..a9e71a344e 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDouble; @@ -42,6 +43,23 @@ */ @VectorizedExpressions({CastTimestampToDouble.class, CastLongToDouble.class, CastDecimalToDouble.class, CastStringToDouble.class}) +@Description(name = "double", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, float, double, decimal, timestamp:\n" + + " x fits into the type _FUNC_ -> x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1.0\n" + + " false -> 0.0\n" + + "- string:\n" + + " x is a valid _FUNC_ -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1" +) public class UDFToDouble extends UDF { private final DoubleWritable doubleWritable = new DoubleWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java index 451b45fbbc..37179e59f2 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToFloat; @@ -43,6 +44,23 @@ */ @VectorizedExpressions({CastTimestampToDouble.class, CastLongToFloatViaLongToDouble.class, CastDecimalToFloat.class, CastStringToFloat.class}) +@Description(name = "float", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, float, double, decimal, timestamp:\n" + + " x fits into the type _FUNC_ -> x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1.0\n" + + " false -> 0.0\n" + + "- string:\n" + + " x is a valid _FUNC_ -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1.0" +) public class UDFToFloat extends UDF { private final FloatWritable floatWritable = new FloatWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java index 4fe9c323cc..0d07fcb0b8 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; @@ -46,6 +47,23 @@ */ @VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class, CastStringToLong.class}) +@Description(name = "int", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, timestamp:\n" + + " x fits into the type _FUNC_ -> integer part of x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1\n" + + " false -> 0\n" + + "- string:\n" + + " x is a valid integer -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1" +) public class UDFToInteger extends UDF { private final IntWritable intWritable = new IntWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java index b31eeb08a0..ebd1fa464f 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; @@ -44,6 +45,23 @@ */ @VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class, CastStringToLong.class}) +@Description(name = "bigint", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, timestamp:\n" + + " x fits into the type _FUNC_ -> integer part of x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1\n" + + " false -> 0\n" + + "- string:\n" + + " x is a valid integer -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1" +) public class UDFToLong extends UDF { private final LongWritable longWritable = new LongWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java index 315789c1c1..2146b2d081 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; @@ -44,6 +45,23 @@ */ @VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class, CastStringToLong.class}) +@Description(name = "bigint", + value = "_FUNC_(x) - converts it's parameter to _FUNC_", + extended = + "- x is NULL -> NULL\n" + + "- byte, short, integer, long, timestamp:\n" + + " x fits into the type _FUNC_ -> integer part of x\n" + + " undefined otherwise\n" + + "- boolean:\n" + + " true -> 1\n" + + " false -> 0\n" + + "- string:\n" + + " x is a valid integer -> x\n" + + " NULL otherwise\n" + + "Example:\n " + + " > SELECT _FUNC_(true);\n" + + " 1" +) public class UDFToShort extends UDF { ShortWritable shortWritable = new ShortWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java index a8bcc972bb..93e54111c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -51,6 +52,7 @@ /** * Generic UDF to generate Bloom Filter */ +@Description(name = "bloom_filter") public class GenericUDAFBloomFilter implements GenericUDAFResolver2 { @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java index 70541fe565..347788a9e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java @@ -30,16 +30,16 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; +@Description( + name = "cume_dist", + value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + + "statistical books) computes the position of a specified value relative to a set of values. " + + "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + + "CUME_DIST(x) = number of values in S coming before " + + " and including x in the specified order/ N" +) @WindowFunctionDescription ( - description = @Description( - name = "cume_dist", - value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + - "statistical books) computes the position of a specified value relative to a set of values. " + - "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + - "CUME_DIST(x) = number of values in S coming before " + - " and including x in the specified order/ N" - ), supportsWindow = false, pivotResult = true, rankingFunction = true, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java index 30bfd2bb8c..b773bdf369 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java @@ -21,15 +21,15 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; +@Description( + name = "dense_rank", + value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + + "gaps in ranking sequence when there are ties. That is, if you were " + + "ranking a competition using DENSE_RANK and had three people tie for " + + "second place, you would say that all three were in second place and " + + "that the next person came in third." +) @WindowFunctionDescription( - description = @Description( - name = "dense_rank", - value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + - "gaps in ranking sequence when there are ties. That is, if you were " + - "ranking a competition using DENSE_RANK and had three people tie for " + - "second place, you would say that all three were in second place and " + - "that the next person came in third." - ), supportsWindow = false, pivotResult = true, rankingFunction = true, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java index b8b7d8e6da..9dc6252228 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java @@ -41,11 +41,11 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +@Description( + name = "first_value", + value = "_FUNC_(x)" +) @WindowFunctionDescription( - description = @Description( - name = "first_value", - value = "_FUNC_(x)" - ), supportsWindow = true, pivotResult = false, impliesOrder = true diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java index e0edbb42af..7496cf7372 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java @@ -32,12 +32,11 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLead.LeadBuffer; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLeadLag.GenericUDAFLeadLagEvaluator; -@WindowFunctionDescription -( - description = @Description( - name = "lag", - value = "_FUNC_(expr, amt, default)" - ), +@Description( + name = "lag", + value = "_FUNC_(expr, amt, default)" +) +@WindowFunctionDescription( supportsWindow = false, pivotResult = true, impliesOrder = true diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java index dadec3b793..2f40183e67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java @@ -37,7 +37,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), +@Description(name = "last_value", value = "_FUNC_(x)") +@WindowFunctionDescription( supportsWindow = true, pivotResult = false, impliesOrder = true) public class GenericUDAFLastValue extends AbstractGenericUDAFResolver { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java index e678278b8b..31c9d630a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java @@ -28,12 +28,11 @@ import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer; -@WindowFunctionDescription -( - description = @Description( - name = "lead", - value = "_FUNC_(expr, amt, default)" - ), +@Description( + name = "lead", + value = "_FUNC_(expr, amt, default)" +) +@WindowFunctionDescription( supportsWindow = false, pivotResult = true, impliesOrder = true diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java index 8b2812d5bc..73a4876841 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFNTile.java @@ -38,14 +38,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; +@Description( + name = "rank", + value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other " + +"common summary statistics. This function divides an ordered partition into a " + + "specified number of groups called buckets and assigns a bucket number to each row " + + "in the partition." +) @WindowFunctionDescription( - description = @Description( - name = "rank", - value = "_FUNC_(x) NTILE allows easy calculation of tertiles, quartiles, deciles and other " - +"common summary statistics. This function divides an ordered partition into a " - + "specified number of groups called buckets and assigns a bucket number to each row " - + "in the partition." - ), supportsWindow = false, pivotResult = true ) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java index 1a7c94431b..806aeb581d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java @@ -31,13 +31,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; +@Description( + name = "percent_rank", + value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " + + "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " + + "(rank of row in its partition - 1) / (number of rows in the partition - 1)" +) @WindowFunctionDescription( - description = @Description( - name = "percent_rank", - value = "_FUNC_(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather " + - "than row counts in its numerator. PERCENT_RANK of a row is calculated as: " + - "(rank of row in its partition - 1) / (number of rows in the partition - 1)" - ), supportsWindow = false, pivotResult = true, rankingFunction = true, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java index e7e4fda6ea..00e17f92da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java @@ -55,11 +55,11 @@ /** * GenericUDAFPercentileCont. */ +@Description( + name = "dense_rank", + value = "_FUNC_(input, pc) " + + "- Returns the percentile of expr at pc (range: [0,1]).") @WindowFunctionDescription( - description = @Description( - name = "dense_rank", - value = "_FUNC_(input, pc) " - + "- Returns the percentile of expr at pc (range: [0,1])."), supportsWindow = false, pivotResult = true, supportsWithinGroup = true) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java index d7c295cb11..21580f79cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java @@ -33,11 +33,11 @@ /** * GenericUDAFPercentileDisc. */ +@Description( + name = "dense_rank", + value = "_FUNC_(input, pc) - " + + "Returns the percentile of expr at pc (range: [0,1]) without interpolation.") @WindowFunctionDescription( - description = @Description( - name = "dense_rank", - value = "_FUNC_(input, pc) - " - + "Returns the percentile of expr at pc (range: [0,1]) without interpolation."), supportsWindow = false, pivotResult = true, supportsWithinGroup = true) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java index a28def73a1..13e2f537cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java @@ -38,10 +38,10 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; +@Description( + name = "rank", + value = "_FUNC_(x)") @WindowFunctionDescription( - description = @Description( - name = "rank", - value = "_FUNC_(x)"), supportsWindow = false, pivotResult = true, rankingFunction = true, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java index 41a3e582ec..1a36d92e97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRowNumber.java @@ -37,12 +37,12 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.IntWritable; +@Description( + name = "row_number", + value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting " + + "from 1, as defined by ORDER BY) to each row within the partition." +) @WindowFunctionDescription( - description = @Description( - name = "row_number", - value = "_FUNC_() - The ROW_NUMBER function assigns a unique number (sequentially, starting " - + "from 1, as defined by ORDER BY) to each row within the partition." - ), supportsWindow = false, pivotResult = true ) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAssertTrueOOM.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAssertTrueOOM.java index c5c73835af..c25a8adb92 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAssertTrueOOM.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAssertTrueOOM.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -34,6 +35,9 @@ import org.apache.hadoop.io.BooleanWritable; @UDFType(deterministic = false) +@Description(name = "assert_true_oom", + value = "_FUNC_(condition) - " + + "Throw an MapJoinMemoryExhaustionError if 'condition' is not true.") public class GenericUDFAssertTrueOOM extends GenericUDF { private ObjectInspectorConverters.Converter conditionConverter = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBucketNumber.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBucketNumber.java index 472cc85047..c3d2ea5852 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBucketNumber.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBucketNumber.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; @@ -25,6 +26,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @UDFType(deterministic = false) +@Description(name = "bucket_number", + value = "_FUNC_(x) - returns NULL") public class GenericUDFBucketNumber extends GenericUDF{ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEpochMilli.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEpochMilli.java index d8e822ae97..58bd86d645 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEpochMilli.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEpochMilli.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -29,6 +30,11 @@ /** * GenericUDFEpochMilli. */ +@Description(name = "to_epoch_milli", + value = "_FUNC_(timestamp) - Converts the specified timestamp to number of milliseconds since 1970-01-01", + extended = "Example:\n" + + " > SELECT _FUNC_(cast('2012-02-11 04:30:00' as timestamp));" + + "1328934600000") public class GenericUDFEpochMilli extends GenericUDF { private transient final LongWritable result = new LongWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java index 733fe63e80..332c5c8d5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInBloomFilter.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -45,6 +46,7 @@ * GenericUDF to lookup a value in BloomFilter */ @VectorizedExpressions({VectorInBloomFilterColDynamicValue.class}) +@Description(name = "in_bloom_filter") public class GenericUDFInBloomFilter extends GenericUDF { private transient ObjectInspector valObjectInspector; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSurrogateKey.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSurrogateKey.java index 1372b60724..768359ff2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSurrogateKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSurrogateKey.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; @@ -36,6 +37,12 @@ * The return value is sequence within a query with a unique staring point based on write_id and task_id */ @UDFType(deterministic = false) +@Description(name = "surrogate_key", value = "_FUNC_() - " + + "automatically generate numerical Ids for rows as you enter data into a table", + extended = "Example: CREATE TABLE students \n" + + "(`ID` BIGINT DEFAULT SURROGATE_KEY(),\n" + + " name VARCHAR(64));" +) public class GenericUDFSurrogateKey extends GenericUDF { private static final int DEFAULT_WRITE_ID_BITS = 24; private static final int DEFAULT_TASK_ID_BITS = 16; diff --git ql/src/test/queries/clientpositive/desc_function.q ql/src/test/queries/clientpositive/desc_function.q index d055d9ca03..8d07d7dc9e 100644 --- ql/src/test/queries/clientpositive/desc_function.q +++ ql/src/test/queries/clientpositive/desc_function.q @@ -3,3 +3,12 @@ DESC FUNCTION replace; EXPLAIN DESC FUNCTION EXTENDED replace; DESC FUNCTION EXTENDED replace; + +DESCRIBE FUNCTION dense_rank; +DESCRIBE FUNCTION EXTENDED dense_rank; + +DESCRIBE FUNCTION surrogate_key; +DESCRIBE FUNCTION EXTENDED surrogate_key; + +DESCRIBE FUNCTION boolean; +DESCRIBE FUNCTION EXTENDED boolean; diff --git ql/src/test/results/clientpositive/desc_function.q.out ql/src/test/results/clientpositive/desc_function.q.out index 1f804bba60..986b687a1d 100644 --- ql/src/test/results/clientpositive/desc_function.q.out +++ ql/src/test/results/clientpositive/desc_function.q.out @@ -51,3 +51,57 @@ Example: 'BLack and BLue' Function class:org.apache.hadoop.hive.ql.udf.UDFReplace Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION dense_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION dense_rank +POSTHOOK: type: DESCFUNCTION +dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank +POSTHOOK: type: DESCFUNCTION +dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION surrogate_key +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION surrogate_key +POSTHOOK: type: DESCFUNCTION +surrogate_key() - automatically generate numerical Ids for rows as you enter data into a table +PREHOOK: query: DESCRIBE FUNCTION EXTENDED surrogate_key +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED surrogate_key +POSTHOOK: type: DESCFUNCTION +surrogate_key() - automatically generate numerical Ids for rows as you enter data into a table +Example: CREATE TABLE students +(`ID` BIGINT DEFAULT SURROGATE_KEY(), + name VARCHAR(64)); +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFSurrogateKey +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION boolean +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION boolean +POSTHOOK: type: DESCFUNCTION +boolean(x) - converts it's parameter to boolean +PREHOOK: query: DESCRIBE FUNCTION EXTENDED boolean +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED boolean +POSTHOOK: type: DESCFUNCTION +boolean(x) - converts it's parameter to boolean +Synonyms: udftoboolean +- x is NULL -> NULL +- byte, short, integer, long, float, double, decimal: + x == 0 -> false + x != 0 -> true +- string: + x is '', 'false', 'no', 'zero', 'off' -> false + true otherwise +- date: always NULL +- timestamp + seconds or nanos are 0 -> false + true otherwise +Example: + > SELECT boolean(0); + false +Function class:org.apache.hadoop.hive.ql.udf.UDFToBoolean +Function type:BUILTIN