Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -184,11 +184,6 @@ static Map windowFunctions = Collections.synchronizedMap(new LinkedHashMap()); - /* - * UDAFS that only work when the input rows have an order. - */ - public static final HashSet UDAFS_IMPLY_ORDER = new HashSet(); - static { registerUDF("concat", UDFConcat.class, false); registerUDF("substr", UDFSubstr.class, false); @@ -442,15 +437,6 @@ registerWindowFunction(LEAD_FUNC_NAME, new GenericUDAFLead(), false); registerWindowFunction(LAG_FUNC_NAME, new GenericUDAFLag(), false); - UDAFS_IMPLY_ORDER.add("rank"); - UDAFS_IMPLY_ORDER.add("dense_rank"); - UDAFS_IMPLY_ORDER.add("percent_rank"); - UDAFS_IMPLY_ORDER.add("cume_dist"); - UDAFS_IMPLY_ORDER.add(LEAD_FUNC_NAME); - UDAFS_IMPLY_ORDER.add(LAG_FUNC_NAME); - UDAFS_IMPLY_ORDER.add("first_value"); - UDAFS_IMPLY_ORDER.add("last_value"); - registerTableFunction(NOOP_TABLE_FUNCTION, NoopResolver.class); registerTableFunction(NOOP_MAP_TABLE_FUNCTION, NoopWithMapResolver.class); registerTableFunction(WINDOWING_TABLE_FUNCTION, WindowingTableFunctionResolver.class); @@ -1431,8 +1417,29 @@ return windowFunctions.get(name.toLowerCase()); } + /** + * Both UDF and UDAF functions can imply order for analytical functions + * + * @param name + * name of function + * @return true if a GenericUDF or GenericUDAF exists for this name and implyOrder is true, false + * otherwise. + */ public static boolean impliesOrder(String functionName) { - return functionName == null ? false : UDAFS_IMPLY_ORDER.contains(functionName.toLowerCase()); + FunctionInfo info = mFunctions.get(functionName.toLowerCase()); + if (info != null){ + if (info.isGenericUDF()){ + UDFType type = info.getGenericUDF().getClass().getAnnotation(UDFType.class); + if (type != null){ + return type.impliesOrder(); + } + } + } + WindowFunctionInfo windowInfo = windowFunctions.get(functionName.toLowerCase()); + if (windowInfo != null){ + return windowInfo.isImpliesOrder(); + } + return false; } static void registerHiveUDAFsAsWindowFunctions() @@ -1480,4 +1487,24 @@ mFunctions.put(name.toLowerCase(), tInfo); } + /** + * Use this to check if function is ranking function + * + * @param name + * name of a function + * @return true if function is a UDAF, has WindowFunctionDescription annotation and the annotations + * confirms a ranking function, false otherwise + */ + public static boolean isRankingFunction(String name){ + FunctionInfo info = mFunctions.get(name.toLowerCase()); + GenericUDAFResolver res = info.getGenericUDAFResolver(); + if (res != null){ + WindowFunctionDescription desc = res.getClass().getAnnotation(WindowFunctionDescription.class); + if (desc != null){ + return desc.rankingFunction(); + } + } + return false; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java (working copy) @@ -50,5 +50,17 @@ * for all the rows. */ boolean pivotResult() default false; + + /** + * Used in translations process to validate arguments + * @return true if ranking function + */ + boolean rankingFunction() default false; + + /** + * Using in analytical functions to specify that UDF implies an ordering + * @return true if the function implies order + */ + boolean impliesOrder() default false; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java (working copy) @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; @SuppressWarnings("deprecation") @@ -27,6 +25,7 @@ { boolean supportsWindow = true; boolean pivotResult = false; + boolean impliesOrder = false; FunctionInfo fInfo; WindowFunctionInfo(FunctionInfo fInfo) @@ -39,6 +38,7 @@ { supportsWindow = def.supportsWindow(); pivotResult = def.pivotResult(); + impliesOrder = def.impliesOrder(); } } @@ -52,6 +52,10 @@ return pivotResult; } + public boolean isImpliesOrder(){ + return impliesOrder; + } + public FunctionInfo getfInfo() { return fInfo; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (working copy) @@ -425,8 +425,7 @@ } } - if (RANKING_FUNCS.contains(spec.getName())) - { + if (FunctionRegistry.isRankingFunction(spec.getName())){ setupRankingArgs(wdwTFnDef, def, spec); } @@ -785,19 +784,6 @@ return combinedOrdExprs; } - - /* - * Ranking Functions helpers - */ - - protected static final ArrayList RANKING_FUNCS = new ArrayList(); - static { - RANKING_FUNCS.add("rank"); - RANKING_FUNCS.add("dense_rank"); - RANKING_FUNCS.add("percent_rank"); - RANKING_FUNCS.add("cume_dist"); - }; - private void setupRankingArgs(WindowTableFunctionDef wdwTFnDef, WindowFunctionDef wFnDef, WindowFunctionSpec wSpec) Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -72,8 +72,6 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo; -import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; -import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (working copy) @@ -34,4 +34,9 @@ boolean deterministic() default true; boolean stateful() default false; boolean distinctLike() default false; + /** + * + * @return true if udf implies window ordering + */ + boolean impliesOrder() default false; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (working copy) @@ -43,7 +43,9 @@ " and including x in the specified order/ N" ), supportsWindow = false, - pivotResult = true + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) public class GenericUDAFCumeDist extends GenericUDAFRank { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (working copy) @@ -34,7 +34,9 @@ "that the next person came in third." ), supportsWindow = false, - pivotResult = true + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) public class GenericUDAFDenseRank extends GenericUDAFRank { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (working copy) @@ -41,7 +41,8 @@ value = "_FUNC_(x)" ), supportsWindow = true, - pivotResult = false + pivotResult = false, + impliesOrder = true ) public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLag.java (working copy) @@ -33,7 +33,8 @@ value = "_FUNC_(expr, amt, default)" ), supportsWindow = false, - pivotResult = true + pivotResult = true, + impliesOrder = true ) public class GenericUDAFLag extends GenericUDAFLeadLag { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (working copy) @@ -34,7 +34,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false) +@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true) public class GenericUDAFLastValue extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLead.java (working copy) @@ -33,7 +33,8 @@ value = "_FUNC_(expr, amt, default)" ), supportsWindow = false, - pivotResult = true + pivotResult = true, + impliesOrder = true ) public class GenericUDAFLead extends GenericUDAFLeadLag { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java (working copy) @@ -40,7 +40,9 @@ "(rank of row in its partition - 1) / (number of rows in the partition - 1)" ), supportsWindow = false, - pivotResult = true + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) public class GenericUDAFPercentRank extends GenericUDAFRank { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java (working copy) @@ -44,7 +44,9 @@ value = "_FUNC_(x)" ), supportsWindow = false, - pivotResult = true + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) public class GenericUDAFRank extends AbstractGenericUDAFResolver { Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java (revision 1507762) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java (working copy) @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; @@ -197,6 +198,7 @@ protected abstract int getIndex(int amt); + @UDFType(impliesOrder = true) public static class GenericUDFLead extends GenericUDFLeadLag { @@ -219,6 +221,7 @@ } + @UDFType(impliesOrder = true) public static class GenericUDFLag extends GenericUDFLeadLag { @Override Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (revision 1507762) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (working copy) @@ -22,6 +22,7 @@ import java.util.LinkedList; import java.util.List; +import junit.framework.Assert; import junit.framework.TestCase; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -146,4 +147,25 @@ @Override protected void tearDown() { } + + public void testIsRankingFunction() { + Assert.assertTrue(FunctionRegistry.isRankingFunction("rank")); + Assert.assertTrue(FunctionRegistry.isRankingFunction("dense_rank")); + Assert.assertTrue(FunctionRegistry.isRankingFunction("percent_rank")); + Assert.assertTrue(FunctionRegistry.isRankingFunction("cume_dist")); + Assert.assertFalse(FunctionRegistry.isRankingFunction("min")); + } + + public void testImpliesOrder() { + Assert.assertTrue(FunctionRegistry.impliesOrder("rank")); + Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank")); + Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank")); + Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist")); + Assert.assertTrue(FunctionRegistry.impliesOrder("first_value")); + Assert.assertTrue(FunctionRegistry.impliesOrder("last_value")); + Assert.assertTrue(FunctionRegistry.impliesOrder("lead")); + Assert.assertTrue(FunctionRegistry.impliesOrder("lag")); + Assert.assertFalse(FunctionRegistry.impliesOrder("min")); + } + }