diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 1ba1b43..9689b4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -137,7 +137,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStdSample; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; @@ -167,6 +169,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode; +import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo; import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathDouble; @@ -627,10 +630,13 @@ public final class FunctionRegistry { * @param argumentTypeInfos * @return The UDAF evaluator */ + @SuppressWarnings("deprecation") public static GenericUDAFEvaluator getGenericUDAFEvaluator(String name, - List argumentTypeInfos) throws SemanticException { - GenericUDAFResolver udaf = getGenericUDAFResolver(name); - if (udaf == null) { + List argumentTypeInfos, boolean isDistinct, + boolean isAllColumns) throws SemanticException { + + GenericUDAFResolver udafResolver = getGenericUDAFResolver(name); + if (udafResolver == null) { return null; } @@ -638,7 +644,19 @@ public final class FunctionRegistry { for (int i = 0; i < parameters.length; i++) { parameters[i] = argumentTypeInfos.get(i); } - return udaf.getEvaluator(parameters); + + GenericUDAFEvaluator udafEvaluator = null; + if (udafResolver instanceof GenericUDAFResolver2) { + GenericUDAFParameterInfo paramInfo = + new SimpleGenericUDAFParameterInfo( + parameters, isDistinct, isAllColumns); + udafEvaluator = + ((GenericUDAFResolver2) udafResolver).getEvaluator(paramInfo); + } else { + udafEvaluator = udafResolver.getEvaluator(parameters); + } + return udafEvaluator; + } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g index 25fda1a..538a11a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g @@ -27,6 +27,7 @@ TOK_ALLCOLREF; TOK_TABLE_OR_COL; TOK_FUNCTION; TOK_FUNCTIONDI; +TOK_FUNCTIONSTAR; TOK_WHERE; TOK_OP_EQ; TOK_OP_NE; @@ -180,15 +181,15 @@ catch (RecognitionException e) { // starting rule statement - : explainStatement EOF - | execStatement EOF - ; + : explainStatement EOF + | execStatement EOF + ; explainStatement @init { msgs.push("explain statement"); } @after { msgs.pop(); } - : KW_EXPLAIN (isExtended=KW_EXTENDED)? execStatement -> ^(TOK_EXPLAIN execStatement $isExtended?) - ; + : KW_EXPLAIN (isExtended=KW_EXTENDED)? execStatement -> ^(TOK_EXPLAIN execStatement $isExtended?) + ; execStatement @init { msgs.push("statement"); } @@ -393,16 +394,16 @@ alterStatementSuffixSerdeProperties alterStatementSuffixFileFormat @init {msgs.push("alter fileformat statement"); } @after {msgs.pop(); } - :name=Identifier KW_SET KW_FILEFORMAT fileFormat - -> ^(TOK_ALTERTABLE_FILEFORMAT $name fileFormat) - ; + :name=Identifier KW_SET KW_FILEFORMAT fileFormat + -> ^(TOK_ALTERTABLE_FILEFORMAT $name fileFormat) + ; alterStatementSuffixClusterbySortby @init {msgs.push("alter cluster by sort by statement");} @after{msgs.pop();} - :name=Identifier tableBuckets - ->^(TOK_ALTERTABLE_CLUSTER_SORT $name tableBuckets) - ; + :name=Identifier tableBuckets + ->^(TOK_ALTERTABLE_CLUSTER_SORT $name tableBuckets) + ; fileFormat @init { msgs.push("file format specification"); } @@ -877,7 +878,7 @@ selectClause @init { msgs.push("select clause"); } @after { msgs.pop(); } : - KW_SELECT hintClause? (((KW_ALL | dist=KW_DISTINCT)? selectList) + KW_SELECT hintClause? (((KW_ALL | dist=KW_DISTINCT)? selectList) | (transform=KW_TRANSFORM selectTrfmClause)) -> {$transform == null && $dist == null}? ^(TOK_SELECT hintClause? selectList) -> {$transform == null && $dist != null}? ^(TOK_SELECTDI hintClause? selectList) @@ -892,7 +893,7 @@ selectList : selectItem ( COMMA selectItem )* -> selectItem+ ; - + selectTrfmClause @init { msgs.push("transform clause"); } @after { msgs.pop(); } @@ -1061,9 +1062,9 @@ joinToken lateralView @init {msgs.push("lateral view"); } @after {msgs.pop(); } - : - KW_LATERAL KW_VIEW function tableAlias KW_AS Identifier (COMMA Identifier)* -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function Identifier+ tableAlias))) - ; + : + KW_LATERAL KW_VIEW function tableAlias KW_AS Identifier (COMMA Identifier)* -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function Identifier+ tableAlias))) + ; tableAlias @init {msgs.push("table alias"); } @@ -1180,10 +1181,13 @@ function : functionName LPAREN - (dist=KW_DISTINCT)? - (expression (COMMA expression)*)? - RPAREN -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?) - -> ^(TOK_FUNCTIONDI functionName (expression+)?) + ( + (star=STAR) + | (dist=KW_DISTINCT)? (expression (COMMA expression)*)? + ) + RPAREN -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName) + -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?) + -> ^(TOK_FUNCTIONDI functionName (expression+)?) ; functionName diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a2f9dba..3797971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -313,8 +313,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { */ private void doPhase1GetAllAggregations(ASTNode expressionTree, HashMap aggregations) { - if (expressionTree.getToken().getType() == HiveParser.TOK_FUNCTION - || expressionTree.getToken().getType() == HiveParser.TOK_FUNCTIONDI) { + int exprTokenType = expressionTree.getToken().getType(); + if (exprTokenType == HiveParser.TOK_FUNCTION + || exprTokenType == HiveParser.TOK_FUNCTIONDI + || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { String functionName = unescapeIdentifier(expressionTree.getChild(0) @@ -1709,7 +1711,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0); GenericUDTF genericUDTF = null; - if (udtfExpr.getType() == HiveParser.TOK_FUNCTION) { + int udtfExprType = udtfExpr.getType(); + if (udtfExprType == HiveParser.TOK_FUNCTION + || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) { String funcName = TypeCheckProcFactory.DefaultExprProcessor .getFunctionText(udtfExpr, true); FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); @@ -1918,11 +1922,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * for each GroupBy aggregation. */ static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, - ArrayList aggParameters, ASTNode aggTree) + ArrayList aggParameters, ASTNode aggTree, + boolean isDistinct, boolean isAllColumns) throws SemanticException { ArrayList originalParameterTypeInfos = getTypeInfo(aggParameters); GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( - aggName, originalParameterTypeInfos); + aggName, originalParameterTypeInfos, isDistinct, isAllColumns); if (null == result) { String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters " + originalParameterTypeInfos; @@ -2066,9 +2071,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( - aggName, aggParameters, value); + aggName, aggParameters, value, isDistinct, isAllColumns); assert (genericUDAFEvaluator != null); GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); @@ -2188,12 +2194,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { .getIsPartitionCol())); } boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = null; // For distincts, partial aggregations have not been done if (distPartAgg) { genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, - value); + value, isDistinct, isAllColumns); assert (genericUDAFEvaluator != null); genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); } else { @@ -2305,10 +2312,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( - aggName, aggParameters, value); + aggName, aggParameters, value, isDistinct, isAllColumns); assert (genericUDAFEvaluator != null); GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); @@ -2591,6 +2599,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { String aggName = value.getChild(0).getText(); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators .get(entry.getKey()); @@ -3183,8 +3192,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); - qbm.setDPCtx(dest, dpCtx); - } + qbm.setDPCtx(dest, dpCtx); + } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP // TODO: we should support merge files for dynamically generated partitions later @@ -3202,7 +3211,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } if (dpCtx.getSPPath() != null) { dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath()); - } + } if ((dest_tab.getNumBuckets() > 0) && (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) { dpCtx.setNumBuckets(dest_tab.getNumBuckets()); @@ -3418,10 +3427,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { queryTmpdir, table_desc, conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), - currentTableId, - rsCtx.isMultiFileSpray(), - rsCtx.getNumFiles(), - rsCtx.getTotalFiles(), + currentTableId, + rsCtx.isMultiFileSpray(), + rsCtx.getNumFiles(), + rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx), fsRS, input), inputRR); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDAFResolver.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDAFResolver.java new file mode 100644 index 0000000..df7efba --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDAFResolver.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An abstract class to help facilitate existing implementations of + * GenericUDAFResolver to migrate towards the newly introduced + * interface {@link GenericUDAFResolver2}. This class provides a default + * implementation of this new API and in turn calls + * the existing API {@link GenericUDAFResolver#getEvaluator(TypeInfo[])} by + * ignoring the extra parameter information available via the + * GenericUDAFParameterInfo interface. + * + */ +public abstract class AbstractGenericUDAFResolver + implements GenericUDAFResolver2 +{ + + @SuppressWarnings("deprecation") + @Override + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) + throws SemanticException { + return getEvaluator(info.getParameters()); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java index 0ef4734..71f11a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java @@ -45,7 +45,7 @@ import org.apache.hadoop.util.StringUtils; * */ @Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers") -public class GenericUDAFAverage implements GenericUDAFResolver { +public class GenericUDAFAverage extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFAverage.class.getName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java index 0ff41f7..e2628e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java @@ -38,7 +38,7 @@ import org.apache.hadoop.util.ReflectionUtils; * This class is a bridge between GenericUDAF and UDAF. Old UDAF can be used * with the GenericUDAF infrastructure through this bridge. */ -public class GenericUDAFBridge implements GenericUDAFResolver { +public class GenericUDAFBridge extends AbstractGenericUDAFResolver { UDAF udaf; @@ -51,7 +51,8 @@ public class GenericUDAFBridge implements GenericUDAFResolver { } @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { Class udafEvaluatorClass = udaf.getResolver() .getEvaluatorClass(Arrays.asList(parameters)); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java index 0054664..f462c73 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -30,35 +30,70 @@ import org.apache.hadoop.io.LongWritable; /** * This class implements the COUNT aggregation function as in SQL. */ -@Description(name = "count", value = "_FUNC_(x) - Returns the count") -public class GenericUDAFCount implements GenericUDAFResolver { +@Description(name = "count", + value = "_FUNC_(*) - Returns the total number of retrieved rows, including " + + "rows containing NULL values.\n" + + + "_FUNC_(expr) - Returns the number of rows for which the supplied " + + "expression is non-NULL.\n" + + + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for " + + "which the supplied expression(s) are unique and non-NULL.") +public class GenericUDAFCount implements GenericUDAFResolver2 { @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 1) { - throw new UDFArgumentTypeException(parameters.length - 1, - "Exactly one argument is expected."); - } + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { + // This method implementation is preserved for backward compatibility. return new GenericUDAFCountEvaluator(); } + @Override + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) + throws SemanticException { + + TypeInfo[] parameters = paramInfo.getParameters(); + + if (parameters.length == 0) { + if (!paramInfo.isAllColumns()) { + throw new UDFArgumentException("Argument expected"); + } + assert !paramInfo.isDistinct() : "DISTINCT not supported with *"; + } else { + if (parameters.length > 1 && !paramInfo.isDistinct()) { + throw new UDFArgumentException("DISTINCT keyword must be specified"); + } + assert !paramInfo.isAllColumns() : "* not supported in expression list"; + } + + return new GenericUDAFCountEvaluator().setCountAllColumns( + paramInfo.isAllColumns()); + } + /** * GenericUDAFCountEvaluator. * */ public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator { - private ObjectInspector inputOI; + private boolean countAllColumns = false; + private LongObjectInspector partialCountAggOI; private LongWritable result; @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + public ObjectInspector init(Mode m, ObjectInspector[] parameters) + throws HiveException { super.init(m, parameters); - assert (parameters.length == 1); - inputOI = parameters[0]; + partialCountAggOI = + PrimitiveObjectInspectorFactory.writableLongObjectInspector; result = new LongWritable(0); return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } + private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) { + countAllColumns = countAllCols; + return this; + } + /** class for storing count value. */ static class CountAgg implements AggregationBuffer { long value; @@ -66,9 +101,9 @@ public class GenericUDAFCount implements GenericUDAFResolver { @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { - CountAgg result = new CountAgg(); - reset(result); - return result; + CountAgg buffer = new CountAgg(); + reset(buffer); + return buffer; } @Override @@ -77,17 +112,31 @@ public class GenericUDAFCount implements GenericUDAFResolver { } @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 1); - if (parameters[0] != null) { + public void iterate(AggregationBuffer agg, Object[] parameters) + throws HiveException { + if (countAllColumns) { + assert parameters.length == 0; ((CountAgg) agg).value++; + } else { + assert parameters.length > 0; + boolean countThisRow = true; + for (Object nextParam : parameters) { + if (nextParam == null) { + countThisRow = false; + break; + } + } + if (countThisRow) { + ((CountAgg) agg).value++; + } } } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { + public void merge(AggregationBuffer agg, Object partial) + throws HiveException { if (partial != null) { - long p = ((LongObjectInspector) inputOI).get(partial); + long p = partialCountAggOI.get(partial); ((CountAgg) agg).value += p; } } @@ -102,7 +151,5 @@ public class GenericUDAFCount implements GenericUDAFResolver { public Object terminatePartial(AggregationBuffer agg) throws HiveException { return terminate(agg); } - } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java index 1db881a..21a97a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java @@ -52,7 +52,7 @@ import org.apache.hadoop.util.StringUtils; */ @Description(name = "histogram_numeric", value = "_FUNC_(expr, nb) - Computes a histogram on numeric 'expr' using nb bins.") -public class GenericUDAFHistogramNumeric implements GenericUDAFResolver { +public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { // class static variables static final Log LOG = LogFactory.getLog(GenericUDAFHistogramNumeric.class.getName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java index 6785687..b9a295c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java @@ -19,25 +19,24 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @Description(name = "max", value = "_FUNC_(expr) - Returns the maximum value of expr") -public class GenericUDAFMax implements GenericUDAFResolver { +public class GenericUDAFMax extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFMax.class.getName()); @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) - throws SemanticException { + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java index 051f3a1..107d663 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java @@ -19,25 +19,24 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @Description(name = "min", value = "_FUNC_(expr) - Returns the minimum value of expr") -public class GenericUDAFMin implements GenericUDAFResolver { +public class GenericUDAFMin extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFMin.class.getName()); @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) - throws SemanticException { + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFParameterInfo.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFParameterInfo.java new file mode 100644 index 0000000..e6823ce --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFParameterInfo.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * A callback interface used in conjunction with GenericUDAFResolver2 + * interface that allows for a more extensible and flexible means of + * discovering the parameter types provided for the + * GenericUDAFParameterInfo. + * + */ +public interface GenericUDAFParameterInfo { + + /** + * @return the parameter type list passed into the UDAF. + */ + TypeInfo[] getParameters(); + + /** + * @return true if the UDAF invocation was qualified with DISTINCT + * keyword, false otherwise. + */ + boolean isDistinct(); + + /** + * @return true if the UDAF invocation was done with a wildcard instead of + * explicit parameter list. + */ + boolean isAllColumns(); + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver.java index 9888b52..75f9ca0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver.java @@ -24,28 +24,32 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * A Generic User-defined aggregation function (GenericUDAF) for the use with * Hive. - * + * * GenericUDAFResolver is used at compile time. We use GenericUDAFResolver to * find out the GenericUDAFEvaluator for the parameter types. - * + * + * @deprecated Use {@link GenericUDAFResolver2} instead. */ +@Deprecated public interface GenericUDAFResolver { /** * Get the evaluator for the parameter types. - * + * * The reason that this function returns an object instead of a class is - * because it's possible that the object needs some configuration (that can be - * serialized). In that case the class of the object has to implement the + * because it is possible that the object needs some configuration (that can + * be serialized). In that case the class of the object has to implement the * Serializable interface. At execution time, we will deserialize the object * from the plan and use it to evaluate the aggregations. - * + *

* If the class of the object does not implement Serializable, then we will * create a new instance of the class at execution time. - * + *

* @param parameters * The types of the parameters. We need the type information to know * which evaluator class to use. + * @throws SemanticException */ - GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException; + GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver2.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver2.java new file mode 100644 index 0000000..feb8c35 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFResolver2.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * This interface extends the GenericUDAFResolver interface and + * provides more flexibility in terms of discovering the parameter types + * supplied to the UDAF. Implementations that extend this interface will + * also have access to extra information such as the specification of the + * DISTINCT qualifier or the invocation with the special wildcard + * character. + * + */ +@SuppressWarnings("deprecation") +public interface GenericUDAFResolver2 extends GenericUDAFResolver { + + /** + * Get the evaluator for the parameter types. + * + * The reason that this function returns an object instead of a class is + * because it is possible that the object needs some configuration (that can + * be serialized). In that case the class of the object has to implement the + * Serializable interface. At execution time, we will deserialize the object + * from the plan and use it to evaluate the aggregations. + *

+ * If the class of the object does not implement Serializable, then we will + * create a new instance of the class at execution time. + *

+ * + * @param info The parameter information that is applicable to the UDAF being + * invoked. + * @throws SemanticException + */ + GenericUDAFEvaluator getEvaluator( + GenericUDAFParameterInfo info) throws SemanticException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index ce97afd..5c4b3bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -38,12 +38,13 @@ import org.apache.hadoop.util.StringUtils; * */ @Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers") -public class GenericUDAFSum implements GenericUDAFResolver { +public class GenericUDAFSum extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName()); @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java index 26dc84c..28ddbdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java @@ -47,7 +47,7 @@ import org.apache.hadoop.util.StringUtils; */ @Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers") -public class GenericUDAFVariance implements GenericUDAFResolver { +public class GenericUDAFVariance extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/SimpleGenericUDAFParameterInfo.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/SimpleGenericUDAFParameterInfo.java new file mode 100644 index 0000000..3277b98 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/SimpleGenericUDAFParameterInfo.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * A simple implementation of GenericUDAFParameterInfo. + * + */ +public class SimpleGenericUDAFParameterInfo implements GenericUDAFParameterInfo +{ + + private final TypeInfo[] parameters; + private final boolean distinct; + private final boolean allColumns; + + public SimpleGenericUDAFParameterInfo(TypeInfo[] params, boolean distinct, + boolean allColumns) { + this.parameters = params; + this.distinct = distinct; + this.allColumns = allColumns; + } + + @Override + public TypeInfo[] getParameters() { + return parameters; + } + + @Override + public boolean isDistinct() { + return distinct; + } + + @Override + public boolean isAllColumns() { + return allColumns; + } +} diff --git ql/src/test/queries/clientpositive/udf_count.q ql/src/test/queries/clientpositive/udf_count.q index 2d1510f..625140c 100644 --- ql/src/test/queries/clientpositive/udf_count.q +++ ql/src/test/queries/clientpositive/udf_count.q @@ -1,2 +1,9 @@ DESCRIBE FUNCTION count; DESCRIBE FUNCTION EXTENDED count; + +SELECT count(key) FROM src; +SELECT count(DISTINCT key) FROM src; +SELECT count(DISTINCT key, value) FROM src; +SELECT count(*) FROM src; +SELECT count(1) FROM src; + diff --git ql/src/test/results/clientpositive/udf_count.q.out ql/src/test/results/clientpositive/udf_count.q.out index 2adffd8..172d51a 100644 --- ql/src/test/results/clientpositive/udf_count.q.out +++ ql/src/test/results/clientpositive/udf_count.q.out @@ -2,9 +2,58 @@ PREHOOK: query: DESCRIBE FUNCTION count PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION count POSTHOOK: type: DESCFUNCTION -count(x) - Returns the count +count(*) - Returns the total number of retrieved rows, including rows containing NULL values. +count(expr) - Returns the number of rows for which the supplied expression is non-NULL. +count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL. PREHOOK: query: DESCRIBE FUNCTION EXTENDED count PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED count POSTHOOK: type: DESCFUNCTION -count(x) - Returns the count +count(*) - Returns the total number of retrieved rows, including rows containing NULL values. +count(expr) - Returns the number of rows for which the supplied expression is non-NULL. +count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL. +PREHOOK: query: SELECT count(key) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-03-57_444_8268094848386405053/10000 +POSTHOOK: query: SELECT count(key) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-03-57_444_8268094848386405053/10000 +500 +PREHOOK: query: SELECT count(DISTINCT key) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-03_027_2280178607671255545/10000 +POSTHOOK: query: SELECT count(DISTINCT key) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-03_027_2280178607671255545/10000 +309 +PREHOOK: query: SELECT count(DISTINCT key, value) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-07_470_7969389937698436561/10000 +POSTHOOK: query: SELECT count(DISTINCT key, value) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-07_470_7969389937698436561/10000 +309 +PREHOOK: query: SELECT count(*) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-11_941_5092558765566594685/10000 +POSTHOOK: query: SELECT count(*) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-11_941_5092558765566594685/10000 +500 +PREHOOK: query: SELECT count(1) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-16_880_218108047003625408/10000 +POSTHOOK: query: SELECT count(1) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/Users/arvind/work/src/hive/trunk/build/ql/scratchdir/hive_2010-05-25_15-04-16_880_218108047003625408/10000 +500