diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 5e88f30cab..467ce50e6f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -474,6 +474,8 @@ OBJECTNAME_CONTAINS_DOT(10420, "Table or database name may not contain dot(.) character", true), WITHIN_GROUP_NOT_ALLOWED(10421, "Not an ordered-set aggregate function: {0}. WITHIN GROUP clause is not allowed.", true), + WITHIN_GROUP_PARAMETER_MISMATCH(10422, + "The number of hypothetical direct arguments ({0}) must match the number of ordering columns ({1})", true), //========================== 20000 range starts here ========================// diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 059919710e..3e4364612b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1892,10 +1892,10 @@ public static String invertFuncText(final String funcText) { } } - public static boolean supportsWithinGroup(String functionName) throws SemanticException { + public static boolean isOrderedAggregate(String functionName) throws SemanticException { WindowFunctionInfo windowInfo = getWindowFunctionInfo(functionName); if (windowInfo != null) { - return windowInfo.supportsWithinGroup(); + return windowInfo.isOrderedAggregate(); } return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java index 48645dc3f2..015c26a30e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java @@ -83,5 +83,5 @@ * * @return true if the function can be used as an ordered-set aggregate */ - boolean supportsWithinGroup() default false; + boolean orderedAggregate() default false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java index a0b0e48f4c..fb1a7bde5a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java @@ -27,7 +27,7 @@ private final boolean supportsWindow; private final boolean pivotResult; private final boolean impliesOrder; - private final boolean supportsWithinGroup; + private final boolean orderedAggregate; public WindowFunctionInfo(FunctionType functionType, String functionName, GenericUDAFResolver resolver, FunctionResource[] resources) { @@ -37,7 +37,7 @@ public WindowFunctionInfo(FunctionType functionType, String functionName, supportsWindow = def == null ? true : def.supportsWindow(); pivotResult = def == null ? false : def.pivotResult(); impliesOrder = def == null ? false : def.impliesOrder(); - supportsWithinGroup = def == null ? false : def.supportsWithinGroup(); + orderedAggregate = def == null ? false : def.orderedAggregate(); } public boolean isSupportsWindow() { @@ -52,7 +52,7 @@ public boolean isImpliesOrder() { return impliesOrder; } - public boolean supportsWithinGroup() { - return supportsWithinGroup; + public boolean isOrderedAggregate() { + return orderedAggregate; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 55c6863f67..6639695823 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -224,9 +224,9 @@ function (STAR) => (star=STAR) | (dist=KW_DISTINCT | KW_ALL)? (selectExpression (COMMA selectExpression)*)? ) - RPAREN ((KW_OVER ws=window_specification) | (within=KW_WITHIN KW_GROUP LPAREN KW_ORDER KW_BY colRef=columnRefOrder RPAREN))? + RPAREN ((KW_OVER ws=window_specification) | (within=KW_WITHIN KW_GROUP LPAREN ordBy=orderByClause RPAREN))? -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?) - -> {$within != null}? ^(TOK_FUNCTION functionName (selectExpression+)? ^(TOK_WITHIN_GROUP $colRef)) + -> {$within != null}? ^(TOK_FUNCTION functionName (selectExpression+)? ^(TOK_WITHIN_GROUP $ordBy)) -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?) -> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?) ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 0198c0f724..11e35d0de4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -255,6 +255,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline; import org.apache.hadoop.hive.ql.util.DirectionUtils; +import org.apache.hadoop.hive.ql.util.NullOrdering; import org.apache.hadoop.hive.ql.util.ResourceDownloader; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; @@ -948,17 +949,35 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, private void transformWithinGroup(ASTNode expressionTree, Tree withinGroupNode) throws SemanticException { Tree functionNameNode = expressionTree.getChild(0); - if (!FunctionRegistry.supportsWithinGroup(functionNameNode.getText())) { + if (!FunctionRegistry.isOrderedAggregate(functionNameNode.getText())) { throw new SemanticException(ErrorMsg.WITHIN_GROUP_NOT_ALLOWED, functionNameNode.getText()); } - Tree tabSortColNameNode = withinGroupNode.getChild(0); - ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0); - expressionTree.deleteChild(withinGroupNode.getChildIndex()); - // backward compatibility: the sortkey is the first paramater of the percentile_cont and percentile_disc functions - expressionTree.insertChild(1, sortKey); - expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, - Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); + List parameters = new ArrayList<>(expressionTree.getChildCount() - 2); + for (int i = 1; i < expressionTree.getChildCount() - 1; ++i) { + parameters.add(expressionTree.getChild(i)); + } + while (expressionTree.getChildCount() > 1) { + expressionTree.deleteChild(1); + } + + Tree orderByNode = withinGroupNode.getChild(0); + if (parameters.size() != orderByNode.getChildCount()) { + throw new SemanticException(ErrorMsg.WITHIN_GROUP_PARAMETER_MISMATCH, + Integer.toString(parameters.size()), Integer.toString(orderByNode.getChildCount())); + } + + for (int i = 0; i < orderByNode.getChildCount(); ++i) { + expressionTree.addChild(parameters.get(i)); + Tree tabSortColNameNode = orderByNode.getChild(i); + Tree nullsNode = tabSortColNameNode.getChild(0); + ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0); + expressionTree.addChild(sortKey); + expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, + Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); + expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, + Integer.toString(NullOrdering.fromToken(nullsNode.getType()).getCode()))); + } } private List doPhase1GetDistinctFuncExprs(Map aggregationTrees) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java index d0c155ff2d..242a41328d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf.generic; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory. + writableDoubleObjectInspector; + import java.util.ArrayList; import java.util.List; @@ -27,7 +30,6 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; @Description( @@ -41,20 +43,25 @@ supportsWindow = false, pivotResult = true, rankingFunction = true, - impliesOrder = true) + orderedAggregate = true) public class GenericUDAFCumeDist extends GenericUDAFRank { @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() { + protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() { return new GenericUDAFCumeDistEvaluator(); } + @Override + protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() { + return new GenericUDAFHypotheticalSetCumeDistEvaluator(); + } + public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator { @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); return ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + .getStandardListObjectInspector(writableDoubleObjectInspector); } @Override @@ -91,4 +98,26 @@ public Object terminate(AggregationBuffer agg) throws HiveException { return distances; } } + + /** + * Evaluator for calculating the cumulative distribution. + * SELECT cume_dist(expression) WITHIN GROUP (ORDER BY col1) + * Implementation is based on hypothetical rank calculation: (rank + 1) / (count + 1) + * Differences: + * - rows which has equal column value with the specified expression value should be counted in the rank + * - the return value type of this function is double. + */ + public static class GenericUDAFHypotheticalSetCumeDistEvaluator + extends GenericUDAFHypotheticalSetRankEvaluator { + + public GenericUDAFHypotheticalSetCumeDistEvaluator() { + super(true, PARTIAL_RANK_OI, writableDoubleObjectInspector); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + return new DoubleWritable((rankBuffer.rank + 1.0) / (rankBuffer.rowCount + 1.0)); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java index 992f5bfd21..7fb50ecc00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java @@ -18,8 +18,22 @@ package org.apache.hadoop.hive.ql.udf.generic; +import static org.apache.hadoop.hive.ql.util.DirectionUtils.ASCENDING_CODE; +import static org.apache.hadoop.hive.ql.util.DirectionUtils.DESCENDING_CODE; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory. + writableLongObjectInspector; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.LongWritable; @Description( name = "dense_rank", @@ -32,14 +46,19 @@ supportsWindow = false, pivotResult = true, rankingFunction = true, - impliesOrder = true) + orderedAggregate = true) public class GenericUDAFDenseRank extends GenericUDAFRank { @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() { + protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() { return new GenericUDAFDenseRankEvaluator(); } + @Override + protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() { + return new GenericUDAFHypotheticalSetDenseRankEvaluator(); + } + public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator { /* @@ -50,5 +69,111 @@ protected void nextRank(RankBuffer rb) { rb.currentRank++; } } -} + /** + * Evaluator for calculating the dense rank. + * SELECT dense_rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*) + * Implementation is based on hypothetical rank calculation but the group of values are considered distinct. + * Since the source of the input stream is not sorted a HashSet is used for filter out duplicate values + * which can lead to OOM in large data sets. + */ + public static class GenericUDAFHypotheticalSetDenseRankEvaluator extends GenericUDAFHypotheticalSetRankEvaluator { + + public GenericUDAFHypotheticalSetDenseRankEvaluator() { + super(false, writableLongObjectInspector, writableLongObjectInspector); + } + + @Override + protected void initPartial2AndFinalOI(ObjectInspector[] parameters) { + // nop + } + + private static final class RowData { + private final List columnValues; + + private RowData(List columnValues) { + this.columnValues = columnValues; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RowData rowData = (RowData) o; + return Objects.equals(columnValues, rowData.columnValues); + } + + @Override + public int hashCode() { + return Objects.hash(columnValues); + } + } + + private static class HypotheticalSetDenseRankBuffer extends AbstractAggregationBuffer { + protected Set elements = new HashSet<>(); + private long rank = 0; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new HypotheticalSetDenseRankBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg; + rankBuffer.elements.clear(); + rankBuffer.rank = 0; + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg; + + CompareResult compareResult = compare(parameters); + if (compareResult.getCompareResult() == 0) { + return; + } + + if (compareResult.getOrder() == ASCENDING_CODE && compareResult.getCompareResult() < 0 || + compareResult.getOrder() == DESCENDING_CODE && compareResult.getCompareResult() > 0) { + List columnValues = new ArrayList<>(parameters.length / 4); + for (int i = 0; i < parameters.length / 4; ++i) { + columnValues.add(parameters[i * 4 + 1]); + } + RowData rowData = new RowData(columnValues); + if (!rankBuffer.elements.contains(rowData)) { + rankBuffer.elements.add(rowData); + rankBuffer.rank++; + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg; + return new LongWritable(rankBuffer.rank + 1); + } + + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial == null) { + return; + } + + HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg; + rankBuffer.rank += ((LongWritable)partial).get() - 1; + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg; + return new LongWritable(rankBuffer.rank + 1); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java index 64e9c8b7ca..49fd037ac9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf.generic; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory. + writableDoubleObjectInspector; + import java.util.ArrayList; import org.slf4j.Logger; @@ -28,7 +31,6 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IntWritable; @Description( @@ -40,23 +42,28 @@ supportsWindow = false, pivotResult = true, rankingFunction = true, - impliesOrder = true) + orderedAggregate = true) public class GenericUDAFPercentRank extends GenericUDAFRank { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFPercentRank.class.getName()); @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() { + protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() { return new GenericUDAFPercentRankEvaluator(); } + @Override + protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() { + return new GenericUDAFHypotheticalSetPercentRankEvaluator(); + } + public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator { @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); return ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + writableDoubleObjectInspector); } @Override @@ -76,5 +83,23 @@ public Object terminate(AggregationBuffer agg) throws HiveException { return pranks; } } + + /** + * Evaluator for calculating the percent rank. + * SELECT percent_rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*) + * Implementation is based on hypothetical rank calculation: rank - 1 / count + */ + public static class GenericUDAFHypotheticalSetPercentRankEvaluator extends GenericUDAFHypotheticalSetRankEvaluator { + + public GenericUDAFHypotheticalSetPercentRankEvaluator() { + super(false, PARTIAL_RANK_OI, writableDoubleObjectInspector); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + return new DoubleWritable(((double)rankBuffer.rank) / rankBuffer.rowCount); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java index ad61410180..f7fa280187 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java @@ -63,7 +63,7 @@ @WindowFunctionDescription( supportsWindow = false, pivotResult = true, - supportsWithinGroup = true) + orderedAggregate = true) public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver { private static final Comparator LONG_COMPARATOR; @@ -81,21 +81,35 @@ public int compare(DoubleWritable o1, DoubleWritable o2) { @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - validateParameterTypes(parameters); + if (parameters.length == 2) { // column ref, expression (0 <= percentile <= 1) + return getGenericUDAFEvaluator(parameters[0], parameters[1]); + } else if (parameters.length == 4) { + // expression (0 <= percentile <= 1), order by column ref, order direction, null ordering + return getGenericUDAFEvaluator(parameters[1], parameters[0]); + } else { + throw new UDFArgumentTypeException(parameters.length - 1, "Only 1 argument and a single order column " + + "reference expected."); + } + } + + private GenericUDAFEvaluator getGenericUDAFEvaluator(TypeInfo orderByColumn, TypeInfo percentile) + throws UDFArgumentTypeException { + if (orderByColumn.getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + orderByColumn.getTypeName() + " is passed."); + } - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + switch (((PrimitiveTypeInfo) orderByColumn).getPrimitiveCategory()) { case BYTE: case SHORT: case INT: case LONG: case VOID: - return parameters[1].getCategory() == ObjectInspector.Category.LIST ? - new PercentileContLongArrayEvaluator() : new PercentileContLongEvaluator(); + return createLongEvaluator(percentile); case FLOAT: case DOUBLE: case DECIMAL: - return parameters[1].getCategory() == ObjectInspector.Category.LIST ? - new PercentileContDoubleArrayEvaluator() : new PercentileContDoubleEvaluator(); + return createDoubleEvaluator(percentile); case STRING: case TIMESTAMP: case VARCHAR: @@ -104,22 +118,18 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE case DATE: default: throw new UDFArgumentTypeException(0, - "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + "Only numeric arguments are accepted but " + orderByColumn.getTypeName() + " is passed."); } } - protected void validateParameterTypes(TypeInfo[] parameters) throws UDFArgumentTypeException { - if (parameters.length < 2) { - throw new UDFArgumentTypeException(parameters.length - 1, "Not enough arguments."); - } - if (parameters.length > 3) { - throw new UDFArgumentTypeException(parameters.length - 1, "Too many arguments."); - } + protected GenericUDAFEvaluator createLongEvaluator(TypeInfo percentile) { + return percentile.getCategory() == ObjectInspector.Category.LIST ? + new PercentileContLongArrayEvaluator() : new PercentileContLongEvaluator(); + } - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } + protected GenericUDAFEvaluator createDoubleEvaluator(TypeInfo percentile) { + return percentile.getCategory() == ObjectInspector.Category.LIST ? + new PercentileContDoubleArrayEvaluator() : new PercentileContDoubleEvaluator(); } /** @@ -196,7 +206,15 @@ protected PercentileContEvaluator(Comparator> comparator, public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); - initInspectors(parameters); + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data + if (parameters.length == 2) { // Order direction was not given, default to asc + initInspectors((PrimitiveObjectInspector) parameters[0]); + } else { + initInspectors((PrimitiveObjectInspector) parameters[1], (WritableConstantIntObjectInspector) parameters[2]); + } + } else { // ...for partial result as input + initPartialInspectors((StructObjectInspector) parameters[0]); + } if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result partialResult = new Object[3]; @@ -229,25 +247,29 @@ private void sortEntries(List> entriesList, boolean isAsc entriesList.sort(isAscending ? comparator : comparator.reversed()); } - protected void initInspectors(ObjectInspector[] parameters) { - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data - inputOI = (PrimitiveObjectInspector) parameters[0]; - if (parameters.length == 2) { // Order direction was not given, default to asc - isAscending = true; - } else { - isAscending = ((WritableConstantIntObjectInspector) parameters[2]). - getWritableConstantValue().get() != DESCENDING_CODE; - } - } else { // ...for partial result as input - soi = (StructObjectInspector) parameters[0]; + // ...for real input data, no order direction + protected void initInspectors(PrimitiveObjectInspector orderByColumnOI) { + inputOI = orderByColumnOI; + isAscending = true; + } - countsField = soi.getStructFieldRef("counts"); - percentilesField = soi.getStructFieldRef("percentiles"); - isAscendingField = soi.getStructFieldRef("isAscending"); + // ...for real input data, with order direction + protected void initInspectors( + PrimitiveObjectInspector orderByColumnOI, WritableConstantIntObjectInspector orderDirectionOI) { + inputOI = orderByColumnOI; + isAscending = orderDirectionOI.getWritableConstantValue().get() != DESCENDING_CODE; + } - countsOI = (MapObjectInspector) countsField.getFieldObjectInspector(); - percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector(); - } + // ...for partial result as input + protected void initPartialInspectors(StructObjectInspector objectInspector) { + soi = objectInspector; + + countsField = soi.getStructFieldRef("counts"); + percentilesField = soi.getStructFieldRef("percentiles"); + isAscendingField = soi.getStructFieldRef("isAscending"); + + countsOI = (MapObjectInspector) countsField.getFieldObjectInspector(); + percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector(); } @Override @@ -268,16 +290,23 @@ public void reset(AggregationBuffer agg) throws HiveException { @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { PercentileAgg percAgg = (PercentileAgg) agg; + if (parameters.length == 4) { + iterate(percAgg, parameters[0], parameters[1]); + } else { + iterate(percAgg, parameters[1], parameters[0]); + } + } + private void iterate(PercentileAgg percAgg, Object percentiles, Object oderByColumnValue) { if (percAgg.percentiles == null) { - percAgg.percentiles = converter.convertPercentileParameter(parameters[1]); + percAgg.percentiles = converter.convertPercentileParameter(percentiles); } - if (parameters[0] == null) { + if (oderByColumnValue == null) { return; } - T input = getInput(parameters[0], inputOI); + T input = getInput(oderByColumnValue, inputOI); if (input != null) { increment(percAgg, wrapInput(input), 1); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java index c8d3c12c80..8ae8ca21ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java @@ -22,12 +22,9 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; -import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; @@ -41,36 +38,19 @@ @WindowFunctionDescription( supportsWindow = false, pivotResult = true, - supportsWithinGroup = true) + orderedAggregate = true) public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont { @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - validateParameterTypes(parameters); - - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case VOID: - return parameters[1].getCategory() == ObjectInspector.Category.LIST ? - new PercentileDiscLongArrayEvaluator() : new PercentileDiscLongEvaluator(); - case FLOAT: - case DOUBLE: - case DECIMAL: - return parameters[1].getCategory() == ObjectInspector.Category.LIST ? - new PercentileDiscDoubleArrayEvaluator() : new PercentileDiscDoubleEvaluator(); - case STRING: - case TIMESTAMP: - case VARCHAR: - case CHAR: - case BOOLEAN: - case DATE: - default: - throw new UDFArgumentTypeException(0, - "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); - } + protected GenericUDAFEvaluator createLongEvaluator(TypeInfo percentile) { + return percentile.getCategory() == ObjectInspector.Category.LIST ? + new PercentileDiscLongArrayEvaluator() : new PercentileDiscLongEvaluator(); + } + + @Override + protected GenericUDAFEvaluator createDoubleEvaluator(TypeInfo percentile) { + return percentile.getCategory() == ObjectInspector.Category.LIST ? + new PercentileDiscDoubleArrayEvaluator() : new PercentileDiscDoubleEvaluator(); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java index 13e2f537cd..644e7283c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java @@ -18,8 +18,26 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.util.ArrayList; +import static java.util.Arrays.asList; +import static org.apache.hadoop.hive.ql.util.DirectionUtils.ASCENDING_CODE; +import static org.apache.hadoop.hive.ql.util.DirectionUtils.DESCENDING_CODE; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory. + writableLongObjectInspector; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.ql.util.NullOrdering; +import org.apache.hadoop.hive.serde2.objectinspector.FullMapEqualComparer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.LongWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Description; @@ -34,42 +52,70 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; @Description( name = "rank", value = "_FUNC_(x)") @WindowFunctionDescription( - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true) + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + orderedAggregate = true) public class GenericUDAFRank extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFRank.class.getName()); @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException { + if (info.isWindowing()) { + return getWindowingEvaluator(info.getParameterObjectInspectors()); + } + return getHypotheticalSetEvaluator(info.getParameterObjectInspectors()); + } + + private GenericUDAFEvaluator getWindowingEvaluator(ObjectInspector[] parameters) throws SemanticException { if (parameters.length < 1) { throw new UDFArgumentTypeException(parameters.length - 1, "One or more arguments are expected."); } for (int i = 0; i < parameters.length; i++) { - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[i]); - if (!ObjectInspectorUtils.compareSupported(oi)) { - throw new UDFArgumentTypeException(i, - "Cannot support comparison of map<> type or complex type containing map<>."); - } + supportsCompare(parameters, i); } - return createEvaluator(); + return createWindowingEvaluator(); } - protected GenericUDAFAbstractRankEvaluator createEvaluator() { + protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() { return new GenericUDAFRankEvaluator(); } + private GenericUDAFEvaluator getHypotheticalSetEvaluator(ObjectInspector[] parameters) throws SemanticException { + if (parameters.length % 4 != 0) { + throw new UDFArgumentTypeException(parameters.length, + "Invalid number of parameters: " + + "the number of hypothetical direct arguments must match the number of ordering columns"); + } + + for (int i = 0; i < parameters.length / 4; ++i) { + supportsCompare(parameters, 4 * i); + supportsCompare(parameters, 4 * i + 1); + } + + return createHypotheticalSetEvaluator(); + } + + protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() { + return new GenericUDAFHypotheticalSetRankEvaluator(); + } + + private void supportsCompare(ObjectInspector[] parameters, int i2) throws UDFArgumentTypeException { + ObjectInspector oi = parameters[i2]; + if (!ObjectInspectorUtils.compareSupported(oi)) { + throw new UDFArgumentTypeException(i2, + "Cannot support comparison of map<> type or complex type containing map<>."); + } + } + static class RankBuffer implements AggregationBuffer { ArrayList rowNums; @@ -221,5 +267,210 @@ public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, return out; } + + + /** + * Hypothetical rank calculation. + * Calculates the rank of a hypothetical row specified by the arguments of the + * function in a group of values specified by the order by clause. + * SELECT rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*) + * (the number of rows where col1 < expression1 [and coln < expressionn]*) + 1 + */ + public static class GenericUDAFHypotheticalSetRankEvaluator extends GenericUDAFEvaluator { + public static final String RANK_FIELD = "rank"; + public static final String COUNT_FIELD = "count"; + public static final ObjectInspector PARTIAL_RANK_OI = ObjectInspectorFactory.getStandardStructObjectInspector( + asList(RANK_FIELD, COUNT_FIELD), + asList(writableLongObjectInspector, + writableLongObjectInspector)); + + protected static class HypotheticalSetRankBuffer extends AbstractAggregationBuffer { + protected long rank = 0; + protected long rowCount = 0; + + @Override + public int estimate() { + return JavaDataModel.PRIMITIVES2 * 2; + } + } + + protected static class RankAssets { + private final ObjectInspector commonInputOI; + private final ObjectInspectorConverters.Converter directArgumentConverter; + private final ObjectInspectorConverters.Converter inputConverter; + protected final int order; + private final NullOrdering nullOrdering; + + public RankAssets(ObjectInspector commonInputOI, + ObjectInspectorConverters.Converter directArgumentConverter, + ObjectInspectorConverters.Converter inputConverter, + int order, NullOrdering nullOrdering) { + this.commonInputOI = commonInputOI; + this.directArgumentConverter = directArgumentConverter; + this.inputConverter = inputConverter; + this.order = order; + this.nullOrdering = nullOrdering; + } + + public int compare(Object inputValue, Object directArgumentValue) { + return ObjectInspectorUtils.compare(inputConverter.convert(inputValue), commonInputOI, + directArgumentConverter.convert(directArgumentValue), commonInputOI, + new FullMapEqualComparer(), nullOrdering.getNullValueOption()); + } + } + + public GenericUDAFHypotheticalSetRankEvaluator() { + this(false, PARTIAL_RANK_OI, writableLongObjectInspector); + } + + public GenericUDAFHypotheticalSetRankEvaluator( + boolean allowEquality, ObjectInspector partialOutputOI, ObjectInspector finalOI) { + this.allowEquality = allowEquality; + this.partialOutputOI = partialOutputOI; + this.finalOI = finalOI; + } + + private final transient boolean allowEquality; + private final transient ObjectInspector partialOutputOI; + private final transient ObjectInspector finalOI; + private transient List rankAssetsList; + private transient StructObjectInspector partialInputOI; + private transient StructField partialInputRank; + private transient StructField partialInputCount; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + rankAssetsList = new ArrayList<>(parameters.length / 4); + for (int i = 0; i < parameters.length / 4; ++i) { + TypeInfo directArgumentType = TypeInfoUtils.getTypeInfoFromObjectInspector(parameters[4 * i]); + TypeInfo inputType = TypeInfoUtils.getTypeInfoFromObjectInspector(parameters[4 * i + 1]); + TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForComparison(inputType, directArgumentType); + ObjectInspector commonInputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo); + rankAssetsList.add(new RankAssets( + commonInputOI, + ObjectInspectorConverters.getConverter(parameters[4 * i], commonInputOI), + ObjectInspectorConverters.getConverter(parameters[4 * i + 1], commonInputOI), + ((WritableConstantIntObjectInspector) parameters[4 * i + 2]). + getWritableConstantValue().get(), + NullOrdering.fromCode(((WritableConstantIntObjectInspector) parameters[4 * i + 3]). + getWritableConstantValue().get()))); + } + } else { + initPartial2AndFinalOI(parameters); + } + + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { + return partialOutputOI; + } + + return finalOI; + } + + protected void initPartial2AndFinalOI(ObjectInspector[] parameters) { + partialInputOI = (StructObjectInspector) parameters[0]; + partialInputRank = partialInputOI.getStructFieldRef(RANK_FIELD); + partialInputCount = partialInputOI.getStructFieldRef(COUNT_FIELD); + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new HypotheticalSetRankBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + rankBuffer.rank = 0; + rankBuffer.rowCount = 0; + } + + protected static class CompareResult { + private final int compareResult; + private final int order; + + public CompareResult(int compareResult, int order) { + this.compareResult = compareResult; + this.order = order; + } + + public int getCompareResult() { + return compareResult; + } + + public int getOrder() { + return order; + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + rankBuffer.rowCount++; + + CompareResult compareResult = compare(parameters); + + if (compareResult.getCompareResult() == 0) { + if (allowEquality) { + rankBuffer.rank++; + } + return; + } + + if (compareResult.getOrder() == ASCENDING_CODE && compareResult.getCompareResult() < 0 || + compareResult.getOrder() == DESCENDING_CODE && compareResult.getCompareResult() > 0) { + rankBuffer.rank++; + } + } + + protected CompareResult compare(Object[] parameters) { + int i = 0; + int c = 0; + for (RankAssets rankAssets : rankAssetsList) { + c = rankAssets.compare(parameters[4 * i + 1], parameters[4 * i]); + if (c != 0) { + break; + } + ++i; + } + + if (c == 0) { + return new CompareResult(c, -1); + } + + return new CompareResult(c, rankAssetsList.get(i).order); + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + LongWritable[] result = new LongWritable[2]; + result[0] = new LongWritable(rankBuffer.rank + 1); + result[1] = new LongWritable(rankBuffer.rowCount); + return result; + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial == null) { + return; + } + + Object objRank = partialInputOI.getStructFieldData(partial, partialInputRank); + Object objCount = partialInputOI.getStructFieldData(partial, partialInputCount); + + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + rankBuffer.rank += ((LongWritable)objRank).get() - 1; + rankBuffer.rowCount += ((LongWritable)objCount).get(); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg; + return new LongWritable(rankBuffer.rank + 1); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java new file mode 100644 index 0000000000..6bf1db272a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.util; + +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption; + +/** + * Enum for converting different Null ordering description types. + */ +public enum NullOrdering { + NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MAXVALUE), + NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MINVALUE); + + NullOrdering(int code, int token, NullValueOption nullValueOption) { + this.code = code; + this.token = token; + this.nullValueOption = nullValueOption; + } + + private final int code; + private final int token; + private final NullValueOption nullValueOption; + + public static NullOrdering fromToken(int token) { + for (NullOrdering nullOrdering : NullOrdering.values()) { + if (nullOrdering.token == token) { + return nullOrdering; + } + } + throw new EnumConstantNotPresentException(NullOrdering.class, "No enum constant present with token " + token); + } + + public static NullOrdering fromCode(int code) { + for (NullOrdering nullOrdering : NullOrdering.values()) { + if (nullOrdering.code == code) { + return nullOrdering; + } + } + throw new EnumConstantNotPresentException(NullOrdering.class, "No enum constant present with code " + code); + } + + public int getCode() { + return code; + } + + public int getToken() { + return token; + } + + public NullValueOption getNullValueOption() { + return nullValueOption; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index dead3ec472..cd6c2ee37d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -429,10 +429,6 @@ public void testIsRankingFunction() throws Exception { @Test public void testImpliesOrder() throws Exception { - Assert.assertTrue(FunctionRegistry.impliesOrder("rank")); - Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank")); - Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank")); - Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist")); Assert.assertTrue(FunctionRegistry.impliesOrder("first_value")); Assert.assertTrue(FunctionRegistry.impliesOrder("last_value")); Assert.assertTrue(FunctionRegistry.impliesOrder("lead")); diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java index 9d44ed87e9..fb7699e0e4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java @@ -49,31 +49,22 @@ public void testParsePercentileCont() throws Exception { ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); assertEquals(HiveParser.TOK_WITHIN_GROUP, withinGroupNode.getType()); - ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); - assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType()); - } - @Test - public void testParsePercentileContAsc() throws Exception { - ASTNode tree = parseDriver.parseSelect( - "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val ASC) FROM src", null); - ASTNode selExprNode = (ASTNode) tree.getChild(0); - ASTNode functionNode = (ASTNode) selExprNode.getChild(0); - ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); - ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); + ASTNode orderByNode = (ASTNode) withinGroupNode.getChild(0); + assertEquals(HiveParser.TOK_ORDERBY, orderByNode.getType()); + + ASTNode tabSortColNameNode = (ASTNode) orderByNode.getChild(0); assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType()); } @Test - public void testParsePercentileContDesc() throws Exception { + public void testParseMultipleColumnRefs() throws Exception { ASTNode tree = parseDriver.parseSelect( - "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val DESC) FROM src", null); - ASTNode selExpr = (ASTNode) tree.getChild(0); - ASTNode function = (ASTNode) selExpr.getChild(0); + "SELECT rank(3, 4) WITHIN GROUP (ORDER BY val, val2) FROM src", null); ASTNode selExprNode = (ASTNode) tree.getChild(0); ASTNode functionNode = (ASTNode) selExprNode.getChild(0); - ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2); - ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0); - assertEquals(HiveParser.TOK_TABSORTCOLNAMEDESC, tabSortColNameNode.getType()); + ASTNode withinGroupNode = (ASTNode) functionNode.getChild(3); + ASTNode orderByNode = (ASTNode) withinGroupNode.getChild(0); + assertEquals(2, orderByNode.getChildCount()); } } diff --git ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q new file mode 100644 index 0000000000..6b5f3765e9 --- /dev/null +++ ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q @@ -0,0 +1,315 @@ +DESCRIBE FUNCTION rank; +DESCRIBE FUNCTION EXTENDED rank; +DESCRIBE FUNCTION dense_rank; +DESCRIBE FUNCTION EXTENDED dense_rank; +DESCRIBE FUNCTION percent_rank; +DESCRIBE FUNCTION EXTENDED percent_rank; +DESCRIBE FUNCTION cume_dist; +DESCRIBE FUNCTION EXTENDED cume_dist; + + +CREATE TABLE t_test ( + col1 int, + col2 int +); +INSERT INTO t_test VALUES +(NULL, NULL), +(3, 0), +(5, 1), +(5, 1), +(5, 2), +(5, 3), +(10, 20.0), +(NULL, NULL), +(NULL, NULL), +(11, 10.0), +(15, 7.0), +(15, 15.0), +(15, 16.0), +(8, 8.0), +(7, 7.0), +(8, 8.0), +(NULL, NULL); + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + + +select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + + +select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test; + +select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test; + +DROP TABLE t_test; diff --git ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out new file mode 100644 index 0000000000..3ea6f1f4e5 --- /dev/null +++ ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out @@ -0,0 +1,762 @@ +PREHOOK: query: DESCRIBE FUNCTION rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION rank +POSTHOOK: type: DESCFUNCTION +rank(x) +PREHOOK: query: DESCRIBE FUNCTION EXTENDED rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED rank +POSTHOOK: type: DESCFUNCTION +rank(x) +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION dense_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION dense_rank +POSTHOOK: type: DESCFUNCTION +dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank +POSTHOOK: type: DESCFUNCTION +dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION percent_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percent_rank +POSTHOOK: type: DESCFUNCTION +percent_rank(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather than row counts in its numerator. PERCENT_RANK of a row is calculated as: (rank of row in its partition - 1) / (number of rows in the partition - 1) +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percent_rank +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percent_rank +POSTHOOK: type: DESCFUNCTION +percent_rank(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather than row counts in its numerator. PERCENT_RANK of a row is calculated as: (rank of row in its partition - 1) / (number of rows in the partition - 1) +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentRank +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION cume_dist +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION cume_dist +POSTHOOK: type: DESCFUNCTION +cume_dist(x) - The CUME_DIST function (defined as the inverse of percentile in some statistical books) computes the position of a specified value relative to a set of values. To compute the CUME_DIST of a value x in a set S of size N, you use the formula: CUME_DIST(x) = number of values in S coming before and including x in the specified order/ N +PREHOOK: query: DESCRIBE FUNCTION EXTENDED cume_dist +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED cume_dist +POSTHOOK: type: DESCFUNCTION +cume_dist(x) - The CUME_DIST function (defined as the inverse of percentile in some statistical books) computes the position of a specified value relative to a set of values. To compute the CUME_DIST of a value x in a set S of size N, you use the formula: CUME_DIST(x) = number of values in S coming before and including x in the specified order/ N +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCumeDist +Function type:BUILTIN +PREHOOK: query: CREATE TABLE t_test ( + col1 int, + col2 int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test ( + col1 int, + col2 int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL), +(3, 0), +(5, 1), +(5, 1), +(5, 2), +(5, 3), +(10, 20.0), +(NULL, NULL), +(NULL, NULL), +(11, 10.0), +(15, 7.0), +(15, 15.0), +(15, 16.0), +(8, 8.0), +(7, 7.0), +(8, 8.0), +(NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL), +(3, 0), +(5, 1), +(5, 1), +(5, 2), +(5, 3), +(10, 20.0), +(NULL, NULL), +(NULL, NULL), +(11, 10.0), +(15, 7.0), +(15, 15.0), +(15, 16.0), +(8, 8.0), +(7, 7.0), +(8, 8.0), +(NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.col1 SCRIPT [] +POSTHOOK: Lineage: t_test.col2 SCRIPT [] +PREHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 5 6 6 10 10 11 +PREHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 6 6 9 10 10 11 +PREHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 2 3 3 4 4 5 +PREHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 3 3 3 6 6 7 +PREHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471 +PREHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222 +PREHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 5 6 6 10 10 11 +PREHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 6 6 9 10 10 11 +PREHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 2 3 3 4 4 5 +PREHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 3 3 3 6 6 7 +PREHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471 +PREHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222 +PREHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 5 6 6 10 10 11 +PREHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 6 6 9 10 10 11 +PREHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 2 3 3 4 4 5 +PREHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 3 3 3 6 6 7 +PREHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471 +PREHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222 +PREHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1) WITHIN GROUP (ORDER BY col1), +rank(2) WITHIN GROUP (ORDER BY col1), +rank(3) WITHIN GROUP (ORDER BY col1), +rank(4) WITHIN GROUP (ORDER BY col1), +rank(5) WITHIN GROUP (ORDER BY col1), +rank(6) WITHIN GROUP (ORDER BY col1), +rank(7) WITHIN GROUP (ORDER BY col1), +rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 5 6 6 10 10 11 +PREHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +rank(1, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(2, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(3, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(4, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(5, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(6, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(7, 3) WITHIN GROUP (ORDER BY col1, col2), +rank(8, 3) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 5 6 6 9 10 10 11 +PREHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1) WITHIN GROUP (ORDER BY col1), +dense_rank(2) WITHIN GROUP (ORDER BY col1), +dense_rank(3) WITHIN GROUP (ORDER BY col1), +dense_rank(4) WITHIN GROUP (ORDER BY col1), +dense_rank(5) WITHIN GROUP (ORDER BY col1), +dense_rank(6) WITHIN GROUP (ORDER BY col1), +dense_rank(7) WITHIN GROUP (ORDER BY col1), +dense_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 2 3 3 4 4 5 +PREHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2), +dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +2 2 3 3 3 6 6 7 +PREHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +percent_rank(1) WITHIN GROUP (ORDER BY col1), +percent_rank(2) WITHIN GROUP (ORDER BY col1), +percent_rank(3) WITHIN GROUP (ORDER BY col1), +percent_rank(4) WITHIN GROUP (ORDER BY col1), +percent_rank(5) WITHIN GROUP (ORDER BY col1), +percent_rank(6) WITHIN GROUP (ORDER BY col1), +percent_rank(7) WITHIN GROUP (ORDER BY col1), +percent_rank(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471 +PREHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: select +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(2) WITHIN GROUP (ORDER BY col1), +cume_dist(3) WITHIN GROUP (ORDER BY col1), +cume_dist(4) WITHIN GROUP (ORDER BY col1), +cume_dist(5) WITHIN GROUP (ORDER BY col1), +cume_dist(6) WITHIN GROUP (ORDER BY col1), +cume_dist(7) WITHIN GROUP (ORDER BY col1), +cume_dist(8) WITHIN GROUP (ORDER BY col1) +from t_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git ql/src/test/results/clientpositive/udaf_percentile_cont.q.out ql/src/test/results/clientpositive/udaf_percentile_cont.q.out index f12cb6cd5e..5b02d24eb0 100644 --- ql/src/test/results/clientpositive/udaf_percentile_cont.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_cont.q.out @@ -508,23 +508,23 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: percentile_cont(_col0, 0), percentile_cont(_col0, 0.2), percentile_cont(_col0, 0.2, 1), percentile_cont(_col0, 0.2, 0) + aggregations: percentile_cont(_col0, 0), percentile_cont(_col0, 0.2), percentile_cont(0.2, _col0, 1, 0), percentile_cont(0.2, _col0, 1, 1), percentile_cont(0.2, _col0, 0, 0), percentile_cont(0.2, _col0, 0, 1) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>) + Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>), _col4 (type: struct,percentiles:array,isascending:boolean>), _col5 (type: struct,percentiles:array,isascending:boolean>) Reduce Operator Tree: Group By Operator - aggregations: percentile_cont(VALUE._col0), percentile_cont(VALUE._col1), percentile_cont(VALUE._col2), percentile_cont(VALUE._col3) + aggregations: percentile_cont(VALUE._col0), percentile_cont(VALUE._col1), percentile_cont(VALUE._col2), percentile_cont(VALUE._col3), percentile_cont(VALUE._col4), percentile_cont(VALUE._col5) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double) + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col4 (type: double), _col5 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/udaf_percentile_disc.q.out ql/src/test/results/clientpositive/udaf_percentile_disc.q.out index d10fee577c..41de8a2eb8 100644 --- ql/src/test/results/clientpositive/udaf_percentile_disc.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_disc.q.out @@ -508,23 +508,23 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: percentile_disc(_col0, 0), percentile_disc(_col0, 0.2), percentile_disc(_col0, 0.2, 1), percentile_disc(_col0, 0.2, 0) + aggregations: percentile_disc(_col0, 0), percentile_disc(_col0, 0.2), percentile_disc(0.2, _col0, 1, 0), percentile_disc(0.2, _col0, 1, 1), percentile_disc(0.2, _col0, 0, 0), percentile_disc(0.2, _col0, 0, 1) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>) + Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct,percentiles:array,isascending:boolean>), _col1 (type: struct,percentiles:array,isascending:boolean>), _col2 (type: struct,percentiles:array,isascending:boolean>), _col3 (type: struct,percentiles:array,isascending:boolean>), _col4 (type: struct,percentiles:array,isascending:boolean>), _col5 (type: struct,percentiles:array,isascending:boolean>) Reduce Operator Tree: Group By Operator - aggregations: percentile_disc(VALUE._col0), percentile_disc(VALUE._col1), percentile_disc(VALUE._col2), percentile_disc(VALUE._col3) + aggregations: percentile_disc(VALUE._col0), percentile_disc(VALUE._col1), percentile_disc(VALUE._col2), percentile_disc(VALUE._col3), percentile_disc(VALUE._col4), percentile_disc(VALUE._col5) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double) + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col4 (type: double), _col5 (type: double), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator