diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 07b139e1fe..5e88f30cab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -472,6 +472,8 @@ RESOURCE_PLAN_NOT_EXISTS(10418, "Resource plan {0} does not exist", true), INCOMPATIBLE_STRUCT(10419, "Incompatible structs.", true), OBJECTNAME_CONTAINS_DOT(10420, "Table or database name may not contain dot(.) character", true), + WITHIN_GROUP_NOT_ALLOWED(10421, + "Not an ordered-set aggregate function: {0}. WITHIN GROUP clause is not allowed.", true), //========================== 20000 range starts here ========================// diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 1616409256..059919710e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1891,4 +1891,12 @@ public static String invertFuncText(final String funcText) { return null; // helps identify unsupported functions } } + + public static boolean supportsWithinGroup(String functionName) throws SemanticException { + WindowFunctionInfo windowInfo = getWindowFunctionInfo(functionName); + if (windowInfo != null) { + return windowInfo.supportsWithinGroup(); + } + return false; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java index 9f833a037b..511d9641c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java @@ -51,15 +51,38 @@ boolean pivotResult() default false; /** - * Used in translations process to validate arguments + * Used in translations process to validate arguments. * @return true if ranking function */ boolean rankingFunction() default false; - /** - * Using in analytical functions to specify that UDF implies an ordering - * @return true if the function implies order - */ - boolean impliesOrder() default false; -} + /** + * Using in analytical functions to specify that UDF implies an ordering. + * @return true if the function implies order + */ + boolean impliesOrder() default false; + /** + * This property specifies whether the UDAF is an Ordered-set aggregate function. + * ::= + * | + * + * + * ::= + * + * + * + * + * ::= RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST + * + * ::= + * + * + * + * + * ::= PERCENTILE_CONT | PERCENTILE_DISC + * + * @return true if the function can be used as an ordered-set aggregate + */ + boolean supportsWithinGroup() default false; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java index 5e86eae6b1..a0b0e48f4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java @@ -27,6 +27,7 @@ private final boolean supportsWindow; private final boolean pivotResult; private final boolean impliesOrder; + private final boolean supportsWithinGroup; public WindowFunctionInfo(FunctionType functionType, String functionName, GenericUDAFResolver resolver, FunctionResource[] resources) { @@ -36,6 +37,7 @@ public WindowFunctionInfo(FunctionType functionType, String functionName, supportsWindow = def == null ? true : def.supportsWindow(); pivotResult = def == null ? false : def.pivotResult(); impliesOrder = def == null ? false : def.impliesOrder(); + supportsWithinGroup = def == null ? false : def.supportsWithinGroup(); } public boolean isSupportsWindow() { @@ -49,4 +51,8 @@ public boolean isPivotResult() { public boolean isImpliesOrder() { return impliesOrder; } + + public boolean supportsWithinGroup() { + return supportsWithinGroup; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index af9fba6f36..f14b38bd75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -385,6 +385,7 @@ KW_SYNC: 'SYNC'; KW_AST: 'AST'; KW_COST: 'COST'; KW_JOINCOST: 'JOINCOST'; +KW_WITHIN: 'WITHIN'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 6e7ed4d83e..c2e8fc6349 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -442,6 +442,7 @@ TOK_REPLACE; TOK_LIKERP; TOK_UNMANAGED; TOK_INPUTFORMAT; +TOK_WITHIN_GROUP; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 1935d3f3fc..55c6863f67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -224,8 +224,9 @@ function (STAR) => (star=STAR) | (dist=KW_DISTINCT | KW_ALL)? (selectExpression (COMMA selectExpression)*)? ) - RPAREN (KW_OVER ws=window_specification)? + RPAREN ((KW_OVER ws=window_specification) | (within=KW_WITHIN KW_GROUP LPAREN KW_ORDER KW_BY colRef=columnRefOrder RPAREN))? -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?) + -> {$within != null}? ^(TOK_FUNCTION functionName (selectExpression+)? ^(TOK_WITHIN_GROUP $colRef)) -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?) -> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?) ; @@ -867,6 +868,7 @@ nonReserved | KW_RESOURCE | KW_PLAN | KW_PLANS | KW_QUERY_PARALLELISM | KW_ACTIVATE | KW_MOVE | KW_DO | KW_POOL | KW_ALLOC_FRACTION | KW_SCHEDULING_POLICY | KW_PATH | KW_MAPPING | KW_WORKLOAD | KW_MANAGEMENT | KW_ACTIVE | KW_UNMANAGED | KW_UNKNOWN + | KW_WITHIN ; //The following SQL2011 reserved keywords are used as function name only, but not as identifiers. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8cd4ba0b1c..c4af314196 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -902,8 +902,8 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, || exprTokenType == HiveParser.TOK_FUNCTIONDI || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { assert (expressionTree.getChildCount() != 0); - if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() - == HiveParser.TOK_WINDOWSPEC) { + Tree lastChild = expressionTree.getChild(expressionTree.getChildCount() - 1); + if (lastChild.getType() == HiveParser.TOK_WINDOWSPEC) { // If it is a windowing spec, we include it in the list // Further, we will examine its children AST nodes to check whether // there are aggregation functions within @@ -912,6 +912,8 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, doPhase1GetAllAggregations((ASTNode) child, aggregations, wdwFns, expressionTree); } return; + } else if (lastChild.getType() == HiveParser.TOK_WITHIN_GROUP) { + transformWithinGroup(expressionTree, lastChild); } if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { String functionName = unescapeIdentifier(expressionTree.getChild(0) @@ -943,6 +945,21 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, } } + private void transformWithinGroup(ASTNode expressionTree, Tree withinGroupNode) throws SemanticException { + Tree functionNameNode = expressionTree.getChild(0); + if (!FunctionRegistry.supportsWithinGroup(functionNameNode.getText())) { + throw new SemanticException(ErrorMsg.WITHIN_GROUP_NOT_ALLOWED, functionNameNode.getText()); + } + + Tree tabSortColNameNode = withinGroupNode.getChild(0); + ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0); + expressionTree.deleteChild(withinGroupNode.getChildIndex()); + // backward compatibility: the sortkey is the first paramater of the percentile_cont and percentile_disc functions + expressionTree.insertChild(1, sortKey); + expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, + Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); + } + private List doPhase1GetDistinctFuncExprs(Map aggregationTrees) { List exprs = new ArrayList(); for (Map.Entry entry : aggregationTrees.entrySet()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java index 72a19bd386..e7e4fda6ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java @@ -18,9 +18,10 @@ package org.apache.hadoop.hive.ql.udf.generic; +import static org.apache.hadoop.hive.ql.util.DirectionUtils.DESCENDING_CODE; + import java.io.Serializable; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -43,16 +45,24 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.LongWritable; /** * GenericUDAFPercentileCont. */ -@Description(name = "percentile_cont", value = "_FUNC_(input, pc) " - + "- Returns the percentile of expr at pc (range: [0,1]).") +@WindowFunctionDescription( + description = @Description( + name = "dense_rank", + value = "_FUNC_(input, pc) " + + "- Returns the percentile of expr at pc (range: [0,1])."), + supportsWindow = false, + pivotResult = true, + supportsWithinGroup = true) public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver { private static final Comparator LONG_COMPARATOR; @@ -70,14 +80,8 @@ public int compare(DoubleWritable o1, DoubleWritable o2) { @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 2) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); - } + validateParameterTypes(parameters); - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -101,6 +105,20 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE } } + protected void validateParameterTypes(TypeInfo[] parameters) throws UDFArgumentTypeException { + if (parameters.length < 2) { + throw new UDFArgumentTypeException(parameters.length - 1, "Not enough arguments."); + } + if (parameters.length > 3) { + throw new UDFArgumentTypeException(parameters.length - 1, "Too many arguments."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + } + /** * A comparator to sort the entries in order - Long. */ @@ -137,12 +155,17 @@ public int compare(Map.Entry o1, public abstract static class PercentileContEvaluator extends GenericUDAFEvaluator { PercentileCalculator calc = getCalculator(); + protected PercentileContEvaluator(Comparator> comparator) { + this.comparator = comparator; + } + /** * A state class to store intermediate aggregation results. */ public class PercentileAgg extends AbstractAggregationBuffer { Map counts; List percentiles; + boolean isAscending; } // For PARTIAL1 and COMPLETE @@ -160,6 +183,10 @@ public int compare(Map.Entry o1, protected transient StructObjectInspector soi; protected transient StructField countsField; protected transient StructField percentilesField; + protected transient StructField isAscendingField; + + private final transient Comparator> comparator; + protected transient boolean isAscending; public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); @@ -167,13 +194,14 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc initInspectors(parameters); if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result - partialResult = new Object[2]; + partialResult = new Object[3]; ArrayList foi = getPartialInspectors(); ArrayList fname = new ArrayList(); fname.add("counts"); fname.add("percentiles"); + fname.add("isAscending"); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); } else { // ...for final result @@ -192,16 +220,25 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc protected abstract U copyInput(U input); - protected abstract void sortEntries(List> entriesList); + private void sortEntries(List> entriesList, boolean isAscending) { + entriesList.sort(isAscending ? comparator : comparator.reversed()); + } protected void initInspectors(ObjectInspector[] parameters) { if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data inputOI = (PrimitiveObjectInspector) parameters[0]; + if (parameters.length == 2) { // Order direction was not given, default to asc + isAscending = true; + } else { + isAscending = ((WritableConstantIntObjectInspector) parameters[2]). + getWritableConstantValue().get() != DESCENDING_CODE; + } } else { // ...for partial result as input soi = (StructObjectInspector) parameters[0]; countsField = soi.getStructFieldRef("counts"); percentilesField = soi.getStructFieldRef("percentiles"); + isAscendingField = soi.getStructFieldRef("isAscending"); countsOI = (MapObjectInspector) countsField.getFieldObjectInspector(); percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector(); @@ -211,6 +248,7 @@ protected void initInspectors(ObjectInspector[] parameters) { @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { PercentileAgg agg = new PercentileAgg(); + agg.isAscending = isAscending; return agg; } @@ -264,6 +302,7 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { Object objCounts = soi.getStructFieldData(partial, countsField); Object objPercentiles = soi.getStructFieldData(partial, percentilesField); + Object objIsAscending = soi.getStructFieldData(partial, isAscendingField); Map counts = (Map) countsOI.getMap(objCounts); List percentiles = @@ -278,6 +317,7 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (percAgg.percentiles == null) { percAgg.percentiles = new ArrayList(percentiles); } + percAgg.isAscending = ((BooleanWritable)objIsAscending).get(); for (Map.Entry e : counts.entrySet()) { increment(percAgg, e.getKey(), e.getValue().get()); @@ -297,7 +337,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { Set> entries = percAgg.counts.entrySet(); List> entriesList = new ArrayList>(entries); - sortEntries(entriesList); + sortEntries(entriesList, percAgg.isAscending); // Accumulate the counts. long total = getTotal(entriesList); @@ -317,6 +357,7 @@ public Object terminatePartial(AggregationBuffer agg) throws HiveException { PercentileAgg percAgg = (PercentileAgg) agg; partialResult[0] = percAgg.counts; partialResult[1] = percAgg.percentiles; + partialResult[2] = new BooleanWritable(percAgg.isAscending); return partialResult; } @@ -353,6 +394,10 @@ protected void calculatePercentile(PercentileAgg percAgg, public static class PercentileContLongEvaluator extends PercentileContEvaluator { + public PercentileContLongEvaluator() { + super(new LongComparator()); + } + protected ArrayList getPartialInspectors() { ArrayList foi = new ArrayList(); @@ -361,6 +406,7 @@ protected void calculatePercentile(PercentileAgg percAgg, PrimitiveObjectInspectorFactory.writableLongObjectInspector)); foi.add(ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + foi.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); return foi; } @@ -376,10 +422,6 @@ protected LongWritable copyInput(LongWritable input) { return new LongWritable(input.get()); } - protected void sortEntries(List> entriesList) { - Collections.sort(entriesList, new LongComparator()); - } - @Override protected PercentileCalculator getCalculator() { return new PercentileContLongCalculator(); @@ -391,6 +433,10 @@ protected void sortEntries(List> entriesList) */ public static class PercentileContDoubleEvaluator extends PercentileContEvaluator { + public PercentileContDoubleEvaluator() { + super(new DoubleComparator()); + } + @Override protected ArrayList getPartialInspectors() { ArrayList foi = new ArrayList(); @@ -400,6 +446,7 @@ protected void sortEntries(List> entriesList) PrimitiveObjectInspectorFactory.writableLongObjectInspector)); foi.add(ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + foi.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); return foi; } @@ -417,10 +464,6 @@ protected DoubleWritable copyInput(DoubleWritable input) { return new DoubleWritable(input.get()); } - protected void sortEntries(List> entriesList) { - Collections.sort(entriesList, new DoubleComparator()); - } - @Override protected PercentileCalculator getCalculator() { return new PercentileContDoubleCalculator(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java index 3ac336e68a..d7c295cb11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java @@ -23,9 +23,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; @@ -33,20 +33,20 @@ /** * GenericUDAFPercentileDisc. */ -@Description(name = "percentile_disc", value = "_FUNC_(input, pc) - " - + "Returns the percentile of expr at pc (range: [0,1]) without interpolation.") +@WindowFunctionDescription( + description = @Description( + name = "dense_rank", + value = "_FUNC_(input, pc) - " + + "Returns the percentile of expr at pc (range: [0,1]) without interpolation."), + supportsWindow = false, + pivotResult = true, + supportsWithinGroup = true) public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont { @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 2) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); - } + validateParameterTypes(parameters); - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { case BYTE: case SHORT: diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java new file mode 100644 index 0000000000..9d44ed87e9 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +/** + * Test cases for parse WITHIN GROUP clause syntax. + * function(expression) WITHIN GROUP (ORDER BY sort_expression) + */ +public class TestParseWithinGroupClause { + ParseDriver parseDriver = new ParseDriver(); + + @Test + public void testParsePercentileCont() throws Exception { + ASTNode tree = parseDriver.parseSelect("SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val) FROM src", null); + + assertEquals(1, tree.getChildCount()); + ASTNode selExprNode = (ASTNode) tree.getChild(0); + assertEquals(1, selExprNode.getChildCount()); + ASTNode functionNode = (ASTNode) selExprNode.getChild(0); + assertEquals(HiveParser.TOK_FUNCTION, functionNode.getType()); + assertEquals(3, functionNode.getChildCount()); + + ASTNode functionNameNode = (ASTNode) functionNode.getChild(0); + assertEquals(HiveParser.Identifier, functionNameNode.getType()); + assertEquals("percentile_cont", functionNameNode.getText()); + + ASTNode fractionNode = (ASTNode) functionNode.getChild(1); + assertEquals(HiveParser.Number, fractionNode.getType()); + assertEquals("0.4", fractionNode.getText()); + + ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); + assertEquals(HiveParser.TOK_WITHIN_GROUP, withinGroupNode.getType()); + ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); + assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType()); + } + + @Test + public void testParsePercentileContAsc() throws Exception { + ASTNode tree = parseDriver.parseSelect( + "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val ASC) FROM src", null); + ASTNode selExprNode = (ASTNode) tree.getChild(0); + ASTNode functionNode = (ASTNode) selExprNode.getChild(0); + ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); + ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); + assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType()); + } + + @Test + public void testParsePercentileContDesc() throws Exception { + ASTNode tree = parseDriver.parseSelect( + "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val DESC) FROM src", null); + ASTNode selExpr = (ASTNode) tree.getChild(0); + ASTNode function = (ASTNode) selExpr.getChild(0); + ASTNode selExprNode = (ASTNode) tree.getChild(0); + ASTNode functionNode = (ASTNode) selExprNode.getChild(0); + ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); + ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); + assertEquals(HiveParser.TOK_TABSORTCOLNAMEDESC, tabSortColNameNode.getType()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java index 1a7aaca08d..c64e27426d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java @@ -149,6 +149,7 @@ private void checkPercentile(Long[] items, double percentile, double expected) t agg.percentiles = new ArrayList(); agg.percentiles.add(new DoubleWritable(percentile)); + agg.isAscending = true; for (int i = 0; i < items.length; i++) { eval.increment(agg, new LongWritable(items[i]), 1); @@ -168,6 +169,7 @@ private void checkPercentile(Double[] items, double percentile, double expected) agg.percentiles = new ArrayList(); agg.percentiles.add(new DoubleWritable(percentile)); + agg.isAscending = true; for (int i = 0; i < items.length; i++) { eval.increment(agg, new DoubleWritable(items[i]), 1); diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java index 1a1b3b0841..390d51b7b5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java @@ -148,6 +148,7 @@ private void checkPercentile(Long[] items, double percentile, double expected) t agg.percentiles = new ArrayList(); agg.percentiles.add(new DoubleWritable(percentile)); + agg.isAscending = true; for (int i = 0; i < items.length; i++) { eval.increment(agg, new LongWritable(items[i]), 1); @@ -169,6 +170,7 @@ private void checkPercentile(Double[] items, double percentile, double expected) agg.percentiles = new ArrayList(); agg.percentiles.add(new DoubleWritable(percentile)); + agg.isAscending = true; for (int i = 0; i < items.length; i++) { eval.increment(agg, new DoubleWritable(items[i]), 1); diff --git ql/src/test/queries/clientpositive/udaf_percentile_cont.q ql/src/test/queries/clientpositive/udaf_percentile_cont.q index 6d788c1fc9..7a1ac5c968 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_cont.q +++ ql/src/test/queries/clientpositive/udaf_percentile_cont.q @@ -11,6 +11,7 @@ set hive.groupby.skewindata = false; SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -22,6 +23,7 @@ set hive.groupby.skewindata = false; SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -34,6 +36,7 @@ set hive.groupby.skewindata = true; SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -45,6 +48,7 @@ set hive.groupby.skewindata = true; SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -55,15 +59,52 @@ set hive.groupby.skewindata = false; -- test null handling SELECT CAST(key AS INT) DIV 10, - percentile_cont(NULL, 0.0) + percentile_cont(NULL, 0.0), + percentile_cont(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10; -- test empty array handling SELECT CAST(key AS INT) DIV 10, - percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10; -select percentile_cont(cast(key as bigint), 0.5) from src where false; +select percentile_cont(cast(key as bigint), 0.5), + percentile_cont(0.5) within group (order by cast(key as bigint)) +from src where false; + +CREATE TABLE t_test (value int); +INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL); + +EXPLAIN SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test; + +SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test; + +DROP TABLE t_test; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/udaf_percentile_disc.q ql/src/test/queries/clientpositive/udaf_percentile_disc.q index 7ba703eecc..6d93b3418a 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_disc.q +++ ql/src/test/queries/clientpositive/udaf_percentile_disc.q @@ -11,6 +11,7 @@ set hive.groupby.skewindata = false; SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -22,6 +23,7 @@ set hive.groupby.skewindata = false; SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -34,6 +36,7 @@ set hive.groupby.skewindata = true; SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -45,6 +48,7 @@ set hive.groupby.skewindata = true; SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10; @@ -55,15 +59,53 @@ set hive.groupby.skewindata = false; -- test null handling SELECT CAST(key AS INT) DIV 10, - percentile_disc(NULL, 0.0) + percentile_disc(NULL, 0.0), + percentile_disc(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10; -- test empty array handling SELECT CAST(key AS INT) DIV 10, - percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10; -select percentile_disc(cast(key as bigint), 0.5) from src where false; +select percentile_disc(cast(key as bigint), 0.5), + percentile_disc(0.5) within group (order by cast(key as bigint)) +from src where false; + + +CREATE TABLE t_test (value int); +INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL); + +EXPLAIN SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test; + +SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test; + +DROP TABLE t_test; \ No newline at end of file diff --git ql/src/test/results/clientpositive/udaf_percentile_cont.q.out ql/src/test/results/clientpositive/udaf_percentile_cont.q.out index dda6ce55c2..a2dae4a06e 100644 --- ql/src/test/results/clientpositive/udaf_percentile_cont.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_cont.q.out @@ -2,17 +2,18 @@ PREHOOK: query: DESCRIBE FUNCTION percentile_cont PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION percentile_cont POSTHOOK: type: DESCFUNCTION -percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +There is no documentation for function 'percentile_cont' PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont POSTHOOK: type: DESCFUNCTION -percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +There is no documentation for function 'percentile_cont' Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont Function type:BUILTIN PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -22,65 +23,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 4.5 4.5 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 117.0 117.0 119.0 +12 120.0 127.0 127.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 154.0 154.0 158.0 +16 160.0 166.5 166.5 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 186.5 186.5 189.0 +19 190.0 194.5 194.5 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 216.5 216.5 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 234.0 234.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 264.0 264.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 283.5 283.5 289.0 +29 291.0 297.0 297.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 324.0 324.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 376.0 376.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 42.5 42.5 47.0 +40 400.0 403.5 403.5 409.0 +41 411.0 415.5 415.5 419.0 +42 421.0 425.5 425.5 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 467.5 467.5 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 494.5 494.5 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 66.5 66.5 69.0 +7 70.0 73.0 73.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -90,65 +93,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 4.5 4.5 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 117.0 117.0 119.0 +12 120.0 127.0 127.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 154.0 154.0 158.0 +16 160.0 166.5 166.5 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 186.5 186.5 189.0 +19 190.0 194.5 194.5 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 216.5 216.5 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 234.0 234.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 264.0 264.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 283.5 283.5 289.0 +29 291.0 297.0 297.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 324.0 324.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 376.0 376.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 42.5 42.5 47.0 +40 400.0 403.5 403.5 409.0 +41 411.0 415.5 415.5 419.0 +42 421.0 425.5 425.5 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 467.5 467.5 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 494.5 494.5 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 66.5 66.5 69.0 +7 70.0 73.0 73.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -158,65 +163,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 4.5 4.5 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 117.0 117.0 119.0 +12 120.0 127.0 127.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 154.0 154.0 158.0 +16 160.0 166.5 166.5 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 186.5 186.5 189.0 +19 190.0 194.5 194.5 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 216.5 216.5 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 234.0 234.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 264.0 264.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 283.5 283.5 289.0 +29 291.0 297.0 297.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 324.0 324.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 376.0 376.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 42.5 42.5 47.0 +40 400.0 403.5 403.5 409.0 +41 411.0 415.5 415.5 419.0 +42 421.0 425.5 425.5 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 467.5 467.5 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 494.5 494.5 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 66.5 66.5 69.0 +7 70.0 73.0 73.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -226,196 +233,344 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_cont(CAST(substr(value, 5) AS INT), 0.0), percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS DOUBLE)), percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 4.5 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 117.0 119.0 -12 120.0 127.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 154.0 158.0 -16 160.0 166.5 169.0 -17 170.0 175.0 179.0 -18 180.0 186.5 189.0 -19 190.0 194.5 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 216.5 219.0 -22 221.0 224.0 229.0 -23 230.0 234.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 264.0 266.0 -27 272.0 275.0 278.0 -28 280.0 283.5 289.0 -29 291.0 297.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 324.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 376.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 42.5 47.0 -40 400.0 403.5 409.0 -41 411.0 415.5 419.0 -42 421.0 425.5 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 467.5 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 494.5 498.0 -5 51.0 54.0 58.0 -6 64.0 66.5 69.0 -7 70.0 73.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 4.5 4.5 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 117.0 117.0 119.0 +12 120.0 127.0 127.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 154.0 154.0 158.0 +16 160.0 166.5 166.5 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 186.5 186.5 189.0 +19 190.0 194.5 194.5 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 216.5 216.5 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 234.0 234.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 264.0 264.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 283.5 283.5 289.0 +29 291.0 297.0 297.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 324.0 324.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 376.0 376.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 42.5 42.5 47.0 +40 400.0 403.5 403.5 409.0 +41 411.0 415.5 415.5 419.0 +42 421.0 425.5 425.5 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 467.5 467.5 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 494.5 494.5 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 66.5 66.5 69.0 +7 70.0 73.0 73.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(NULL, 0.0) + percentile_cont(NULL, 0.0), + percentile_cont(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(NULL, 0.0) + percentile_cont(NULL, 0.0), + percentile_cont(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 NULL -1 NULL -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 NULL -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 NULL -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 NULL -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL +0 NULL NULL +1 NULL NULL +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 NULL NULL +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 NULL NULL +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 NULL NULL +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_cont(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 1.0 -1 1.0 -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 1.0 -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 1.0 -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 1.0 -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +0 1.0 1.0 +1 1.0 1.0 +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 1.0 1.0 +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 1.0 1.0 +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 1.0 1.0 +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL +PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5), + percentile_cont(0.5) within group (order by cast(key as bigint)) +from src where false PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5), + percentile_cont(0.5) within group (order by cast(key as bigint)) +from src where false POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL +NULL NULL +PREHOOK: query: CREATE TABLE t_test (value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test (value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.value SCRIPT [] +PREHOOK: query: EXPLAIN SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: percentile_cont(_col0, 0), percentile_cont(_col0, 0.2), percentile_cont(_col0, 0.2, 1), percentile_cont(_col0, 0.2, 0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_cont(VALUE._col0), percentile_cont(VALUE._col1), percentile_cont(VALUE._col2), percentile_cont(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT +percentile_cont(value, 0.0), +percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_cont(value, 0.2), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_cont(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +3.0 7.0 7.0 7.0 7.0 true 7.0 true 14.2 14.2 14.2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git ql/src/test/results/clientpositive/udaf_percentile_disc.q.out ql/src/test/results/clientpositive/udaf_percentile_disc.q.out index 75fa36cc36..3ef0cf9874 100644 --- ql/src/test/results/clientpositive/udaf_percentile_disc.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_disc.q.out @@ -2,17 +2,18 @@ PREHOOK: query: DESCRIBE FUNCTION percentile_disc PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION percentile_disc POSTHOOK: type: DESCFUNCTION -percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +There is no documentation for function 'percentile_disc' PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc POSTHOOK: type: DESCFUNCTION -percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +There is no documentation for function 'percentile_disc' Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc Function type:BUILTIN PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -22,65 +23,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 5.0 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 118.0 119.0 -12 120.0 128.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 155.0 158.0 -16 160.0 167.0 169.0 -17 170.0 175.0 179.0 -18 180.0 187.0 189.0 -19 190.0 195.0 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 217.0 219.0 -22 221.0 224.0 229.0 -23 230.0 235.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 265.0 266.0 -27 272.0 275.0 278.0 -28 280.0 284.0 289.0 -29 291.0 298.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 325.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 377.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 43.0 47.0 -40 400.0 404.0 409.0 -41 411.0 417.0 419.0 -42 421.0 427.0 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 468.0 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 495.0 498.0 -5 51.0 54.0 58.0 -6 64.0 67.0 69.0 -7 70.0 74.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 5.0 5.0 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 118.0 118.0 119.0 +12 120.0 128.0 128.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 155.0 155.0 158.0 +16 160.0 167.0 167.0 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 187.0 187.0 189.0 +19 190.0 195.0 195.0 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 217.0 217.0 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 235.0 235.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 265.0 265.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 284.0 284.0 289.0 +29 291.0 298.0 298.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 325.0 325.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 377.0 377.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 43.0 43.0 47.0 +40 400.0 404.0 404.0 409.0 +41 411.0 417.0 417.0 419.0 +42 421.0 427.0 427.0 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 468.0 468.0 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 495.0 495.0 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 67.0 67.0 69.0 +7 70.0 74.0 74.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -90,65 +93,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 5.0 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 118.0 119.0 -12 120.0 128.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 155.0 158.0 -16 160.0 167.0 169.0 -17 170.0 175.0 179.0 -18 180.0 187.0 189.0 -19 190.0 195.0 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 217.0 219.0 -22 221.0 224.0 229.0 -23 230.0 235.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 265.0 266.0 -27 272.0 275.0 278.0 -28 280.0 284.0 289.0 -29 291.0 298.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 325.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 377.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 43.0 47.0 -40 400.0 404.0 409.0 -41 411.0 417.0 419.0 -42 421.0 427.0 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 468.0 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 495.0 498.0 -5 51.0 54.0 58.0 -6 64.0 67.0 69.0 -7 70.0 74.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 5.0 5.0 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 118.0 118.0 119.0 +12 120.0 128.0 128.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 155.0 155.0 158.0 +16 160.0 167.0 167.0 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 187.0 187.0 189.0 +19 190.0 195.0 195.0 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 217.0 217.0 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 235.0 235.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 265.0 265.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 284.0 284.0 289.0 +29 291.0 298.0 298.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 325.0 325.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 377.0 377.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 43.0 43.0 47.0 +40 400.0 404.0 404.0 409.0 +41 411.0 417.0 417.0 419.0 +42 421.0 427.0 427.0 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 468.0 468.0 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 495.0 495.0 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 67.0 67.0 69.0 +7 70.0 74.0 74.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -158,65 +163,67 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 5.0 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 118.0 119.0 -12 120.0 128.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 155.0 158.0 -16 160.0 167.0 169.0 -17 170.0 175.0 179.0 -18 180.0 187.0 189.0 -19 190.0 195.0 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 217.0 219.0 -22 221.0 224.0 229.0 -23 230.0 235.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 265.0 266.0 -27 272.0 275.0 278.0 -28 280.0 284.0 289.0 -29 291.0 298.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 325.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 377.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 43.0 47.0 -40 400.0 404.0 409.0 -41 411.0 417.0 419.0 -42 421.0 427.0 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 468.0 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 495.0 498.0 -5 51.0 54.0 58.0 -6 64.0 67.0 69.0 -7 70.0 74.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 5.0 5.0 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 118.0 118.0 119.0 +12 120.0 128.0 128.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 155.0 155.0 158.0 +16 160.0 167.0 167.0 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 187.0 187.0 189.0 +19 190.0 195.0 195.0 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 217.0 217.0 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 235.0 235.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 265.0 265.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 284.0 284.0 289.0 +29 291.0 298.0 298.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 325.0 325.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 377.0 377.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 43.0 43.0 47.0 +40 400.0 404.0 404.0 409.0 +41 411.0 417.0 417.0 419.0 +42 421.0 427.0 427.0 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 468.0 468.0 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 495.0 495.0 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 67.0 67.0 69.0 +7 70.0 74.0 74.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 @@ -226,196 +233,344 @@ PREHOOK: Input: default@src POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile_disc(CAST(substr(value, 5) AS INT), 0.0), percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY CAST(substr(value, 5) AS INT)), percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 0.0 5.0 9.0 -1 10.0 15.0 19.0 -10 100.0 103.0 105.0 -11 111.0 118.0 119.0 -12 120.0 128.0 129.0 -13 131.0 137.0 138.0 -14 143.0 146.0 149.0 -15 150.0 155.0 158.0 -16 160.0 167.0 169.0 -17 170.0 175.0 179.0 -18 180.0 187.0 189.0 -19 190.0 195.0 199.0 -2 20.0 26.0 28.0 -20 200.0 205.0 209.0 -21 213.0 217.0 219.0 -22 221.0 224.0 229.0 -23 230.0 235.0 239.0 -24 241.0 244.0 249.0 -25 252.0 256.0 258.0 -26 260.0 265.0 266.0 -27 272.0 275.0 278.0 -28 280.0 284.0 289.0 -29 291.0 298.0 298.0 -3 30.0 35.0 37.0 -30 302.0 307.0 309.0 -31 310.0 316.0 318.0 -32 321.0 325.0 327.0 -33 331.0 333.0 339.0 -34 341.0 345.0 348.0 -35 351.0 353.0 356.0 -36 360.0 367.0 369.0 -37 373.0 377.0 379.0 -38 382.0 384.0 389.0 -39 392.0 396.0 399.0 -4 41.0 43.0 47.0 -40 400.0 404.0 409.0 -41 411.0 417.0 419.0 -42 421.0 427.0 429.0 -43 430.0 435.0 439.0 -44 443.0 446.0 449.0 -45 452.0 455.0 459.0 -46 460.0 468.0 469.0 -47 470.0 477.0 479.0 -48 480.0 484.0 489.0 -49 490.0 495.0 498.0 -5 51.0 54.0 58.0 -6 64.0 67.0 69.0 -7 70.0 74.0 78.0 -8 80.0 84.0 87.0 -9 90.0 95.0 98.0 +0 0.0 5.0 5.0 9.0 +1 10.0 15.0 15.0 19.0 +10 100.0 103.0 103.0 105.0 +11 111.0 118.0 118.0 119.0 +12 120.0 128.0 128.0 129.0 +13 131.0 137.0 137.0 138.0 +14 143.0 146.0 146.0 149.0 +15 150.0 155.0 155.0 158.0 +16 160.0 167.0 167.0 169.0 +17 170.0 175.0 175.0 179.0 +18 180.0 187.0 187.0 189.0 +19 190.0 195.0 195.0 199.0 +2 20.0 26.0 26.0 28.0 +20 200.0 205.0 205.0 209.0 +21 213.0 217.0 217.0 219.0 +22 221.0 224.0 224.0 229.0 +23 230.0 235.0 235.0 239.0 +24 241.0 244.0 244.0 249.0 +25 252.0 256.0 256.0 258.0 +26 260.0 265.0 265.0 266.0 +27 272.0 275.0 275.0 278.0 +28 280.0 284.0 284.0 289.0 +29 291.0 298.0 298.0 298.0 +3 30.0 35.0 35.0 37.0 +30 302.0 307.0 307.0 309.0 +31 310.0 316.0 316.0 318.0 +32 321.0 325.0 325.0 327.0 +33 331.0 333.0 333.0 339.0 +34 341.0 345.0 345.0 348.0 +35 351.0 353.0 353.0 356.0 +36 360.0 367.0 367.0 369.0 +37 373.0 377.0 377.0 379.0 +38 382.0 384.0 384.0 389.0 +39 392.0 396.0 396.0 399.0 +4 41.0 43.0 43.0 47.0 +40 400.0 404.0 404.0 409.0 +41 411.0 417.0 417.0 419.0 +42 421.0 427.0 427.0 429.0 +43 430.0 435.0 435.0 439.0 +44 443.0 446.0 446.0 449.0 +45 452.0 455.0 455.0 459.0 +46 460.0 468.0 468.0 469.0 +47 470.0 477.0 477.0 479.0 +48 480.0 484.0 484.0 489.0 +49 490.0 495.0 495.0 498.0 +5 51.0 54.0 54.0 58.0 +6 64.0 67.0 67.0 69.0 +7 70.0 74.0 74.0 78.0 +8 80.0 84.0 84.0 87.0 +9 90.0 95.0 95.0 98.0 PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(NULL, 0.0) + percentile_disc(NULL, 0.0), + percentile_disc(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(NULL, 0.0) + percentile_disc(NULL, 0.0), + percentile_disc(0.0) WITHIN GROUP (ORDER BY NULL) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 NULL -1 NULL -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 NULL -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 NULL -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 NULL -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL +0 NULL NULL +1 NULL NULL +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 NULL NULL +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 NULL NULL +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 NULL NULL +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL PREHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, - percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile_disc(0.5) WITHIN GROUP (ORDER BY IF(CAST(key AS INT) DIV 10 < 5, 1, NULL)) FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 1.0 -1 1.0 -10 NULL -11 NULL -12 NULL -13 NULL -14 NULL -15 NULL -16 NULL -17 NULL -18 NULL -19 NULL -2 1.0 -20 NULL -21 NULL -22 NULL -23 NULL -24 NULL -25 NULL -26 NULL -27 NULL -28 NULL -29 NULL -3 1.0 -30 NULL -31 NULL -32 NULL -33 NULL -34 NULL -35 NULL -36 NULL -37 NULL -38 NULL -39 NULL -4 1.0 -40 NULL -41 NULL -42 NULL -43 NULL -44 NULL -45 NULL -46 NULL -47 NULL -48 NULL -49 NULL -5 NULL -6 NULL -7 NULL -8 NULL -9 NULL -PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +0 1.0 1.0 +1 1.0 1.0 +10 NULL NULL +11 NULL NULL +12 NULL NULL +13 NULL NULL +14 NULL NULL +15 NULL NULL +16 NULL NULL +17 NULL NULL +18 NULL NULL +19 NULL NULL +2 1.0 1.0 +20 NULL NULL +21 NULL NULL +22 NULL NULL +23 NULL NULL +24 NULL NULL +25 NULL NULL +26 NULL NULL +27 NULL NULL +28 NULL NULL +29 NULL NULL +3 1.0 1.0 +30 NULL NULL +31 NULL NULL +32 NULL NULL +33 NULL NULL +34 NULL NULL +35 NULL NULL +36 NULL NULL +37 NULL NULL +38 NULL NULL +39 NULL NULL +4 1.0 1.0 +40 NULL NULL +41 NULL NULL +42 NULL NULL +43 NULL NULL +44 NULL NULL +45 NULL NULL +46 NULL NULL +47 NULL NULL +48 NULL NULL +49 NULL NULL +5 NULL NULL +6 NULL NULL +7 NULL NULL +8 NULL NULL +9 NULL NULL +PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5), + percentile_disc(0.5) within group (order by cast(key as bigint)) +from src where false PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5), + percentile_disc(0.5) within group (order by cast(key as bigint)) +from src where false POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL +NULL NULL +PREHOOK: query: CREATE TABLE t_test (value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test (value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES (NULL), (3), (8), (13), (7), (6), (20), (NULL), (NULL), (10), (7), (15), (16), (8), (7), (8), (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.value SCRIPT [] +PREHOOK: query: EXPLAIN SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: percentile_disc(_col0, 0), percentile_disc(_col0, 0.2), percentile_disc(_col0, 0.2, 1), percentile_disc(_col0, 0.2, 0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_disc(VALUE._col0), percentile_disc(VALUE._col1), percentile_disc(VALUE._col2), percentile_disc(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT +percentile_disc(value, 0.0), +percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value NULLS LAST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value ASC) = percentile_disc(value, 0.2), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS FIRST), +percentile_disc(0.2) WITHIN GROUP (ORDER BY value DESC NULLS LAST) +FROM t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +3.0 7.0 7.0 7.0 7.0 true 7.0 true 13.0 13.0 13.0 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test