diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 10517ad777..f6c484aa2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -67,7 +67,6 @@ import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFHour; import org.apache.hadoop.hive.ql.udf.UDFJson; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFLn; import org.apache.hadoop.hive.ql.udf.UDFLog; @@ -463,6 +462,8 @@ system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); system.registerUDAF("percentile", UDAFPercentile.class); + system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont()); + system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc()); // Generic UDFs diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java new file mode 100644 index 0000000000..d0e3cb35ae --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.LongWritable; + +@Description(name = "percentile_cont", value = "_FUNC_(input, pc) - Returns the percentile of expr at pc (range: [0,1]).") +public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver { + + private static final Comparator COMPARATOR; + + static { + COMPARATOR = ShimLoader.getHadoopShims().getLongComparator(); + } + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 2) { + throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case VOID: + return new PercentileContLongEvaluator(); + case TIMESTAMP: + case FLOAT: + case DOUBLE: + case STRING: + case VARCHAR: + case CHAR: + case DECIMAL: + case BOOLEAN: + case DATE: + default: + throw new UDFArgumentTypeException(0, + "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + } + } + + /** + * A comparator to sort the entries in order. + */ + public static class MyComparator implements Comparator> { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return COMPARATOR.compare(o1.getKey(), o2.getKey()); + } + } + + /** + * The evaluator for percentile computation based on long. + */ + public static class PercentileContLongEvaluator extends GenericUDAFEvaluator { + PercentileCalculator calc = new PercentileContCalculator(); + + /** + * A state class to store intermediate aggregation results. + */ + public static class PercentileAgg extends AbstractAggregationBuffer { + Map counts; + List percentiles; + } + + // For PARTIAL1 and COMPLETE + protected PrimitiveObjectInspector inputOI; + MapObjectInspector countsOI; + ListObjectInspector percentilesOI; + + // For PARTIAL1 and PARTIAL2 + protected transient Object[] partialResult; + + // FINAL and COMPLETE output + DoubleWritable result; + + // PARTIAL2 and FINAL inputs + private transient StructObjectInspector soi; + private transient StructField countsField; + private transient StructField percentilesField; + + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + + // init inspectors... + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data + inputOI = (PrimitiveObjectInspector) parameters[0]; + } else { // ...for partial result as input + soi = (StructObjectInspector) parameters[0]; + + countsField = soi.getStructFieldRef("counts"); + percentilesField = soi.getStructFieldRef("percentiles"); + + countsOI = (MapObjectInspector) countsField.getFieldObjectInspector(); + percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector(); + } + + // init inspectors... + if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result + partialResult = new Object[2]; + + ArrayList foi = new ArrayList(); + + foi.add(ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector, + PrimitiveObjectInspectorFactory.writableLongObjectInspector)); + foi.add(ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + + ArrayList fname = new ArrayList(); + fname.add("counts"); + fname.add("percentiles"); + + return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } else { // ...for final result + result = new DoubleWritable(0); + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + partialResult[0] = percAgg.counts; + partialResult[1] = percAgg.percentiles; + + return partialResult; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + PercentileAgg agg = new PercentileAgg(); + return agg; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + if (percAgg.counts != null) { + percAgg.counts.clear(); + } + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + Double percentile = ((HiveDecimalWritable) parameters[1]).getHiveDecimal().doubleValue(); + + if (percAgg.percentiles == null) { + validatePercentile(percentile); + percAgg.percentiles = new ArrayList(1); + percAgg.percentiles.add(new DoubleWritable(percentile)); + } + + if (parameters[0] == null) { + return; + } + + Long input = PrimitiveObjectInspectorUtils.getLong(parameters[0], inputOI); + + if (input != null) { + increment(percAgg, new LongWritable(input), 1); + } + } + + private void validatePercentile(Double percentile) { + if (percentile < 0.0 || percentile > 1.0) { + throw new RuntimeException("Percentile value must be within the range of 0 to 1."); + } + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial == null) { + return; + } + + Object objCounts = soi.getStructFieldData(partial, countsField); + Object objPercentiles = soi.getStructFieldData(partial, percentilesField); + + Map counts = + (Map) countsOI.getMap(objCounts); + List percentiles = + (List) percentilesOI.getList(objPercentiles); + + if (counts == null || percentiles == null) { + return; + } + + PercentileAgg percAgg = (PercentileAgg) agg; + + if (percAgg.percentiles == null) { + percAgg.percentiles = new ArrayList(percentiles); + } + + for (Map.Entry e : counts.entrySet()) { + increment(percAgg, e.getKey(), e.getValue().get()); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + PercentileAgg percAgg = (PercentileAgg) agg; + + // No input data. + if (percAgg.counts == null || percAgg.counts.size() == 0) { + return null; + } + + // Get all items into an array and sort them. + Set> entries = percAgg.counts.entrySet(); + List> entriesList = + new ArrayList>(entries); + Collections.sort(entriesList, new MyComparator()); + + // Accumulate the counts. + long total = getTotal(entriesList); + + // Initialize the result. + if (result == null) { + result = new DoubleWritable(); + } + + calculatePercentile(percAgg, entriesList, total); + + return result; + } + + protected void calculatePercentile(PercentileAgg percAgg, + List> entriesList, long total) { + // maxPosition is the 1.0 percentile + long maxPosition = total - 1; + double position = maxPosition * percAgg.percentiles.get(0).get(); + result.set(calc.getPercentile(entriesList, position)); + } + + public static long getTotal(List> entriesList) { + long total = 0; + for (int i = 0; i < entriesList.size(); i++) { + LongWritable count = entriesList.get(i).getValue(); + total += count.get(); + count.set(total); + } + return total; + } + + /** + * Increment the State object with o as the key, and i as the count. + */ + void increment(PercentileAgg s, LongWritable input, long i) { + if (s.counts == null) { + s.counts = new HashMap(); + } + LongWritable count = s.counts.get(input); + if (count == null) { + // We have to create a new object, because the object o belongs + // to the code that creates it and may get its value changed. + LongWritable key = new LongWritable(input.get()); + s.counts.put(key, new LongWritable(i)); + } else { + count.set(count.get() + i); + } + } + } + + public static interface PercentileCalculator { + double getPercentile(List> entriesList, double position); + } + + public static class PercentileContCalculator implements PercentileCalculator { + /** + * Get the percentile value. + */ + public double getPercentile(List> entriesList, + double position) { + // We may need to do linear interpolation to get the exact percentile + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + // Linear search since this won't take much time from the total execution anyway + // lower has the range of [0 .. total-1] + // The first entry with accumulated count (lower+1) corresponds to the lower position. + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + long lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + // no interpolation needed because position does not have a fraction + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + long higherKey = entriesList.get(i).getKey().get(); + + if (higherKey == lowerKey) { + // no interpolation needed because lower position and higher position has the same key + return lowerKey; + } + + // Linear interpolation to get the exact percentile + return (higher - position) * lowerKey + (position - lower) * higherKey; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java new file mode 100644 index 0000000000..70f66ac6ff --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java @@ -0,0 +1,96 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator.PercentileAgg; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.LongWritable; + +@Description(name = "percentile_disc", value = "_FUNC_(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation.") +public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 2) { + throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + case VOID: + return new PercentileDiscLongEvaluator(); + case TIMESTAMP: + case FLOAT: + case DOUBLE: + case STRING: + case VARCHAR: + case CHAR: + case DECIMAL: + case BOOLEAN: + case DATE: + default: + throw new UDFArgumentTypeException(0, + "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed."); + } + } + + /** + * The evaluator for percentile computation based on long. + */ + public static class PercentileDiscLongEvaluator extends PercentileContLongEvaluator { + PercentileCalculator calc = new PercentileDiscCalculator(); + + @Override + protected void calculatePercentile(PercentileAgg percAgg, + List> entriesList, long total) { + // maxPosition is the 1.0 percentile + long maxPosition = total - 1; + double position = maxPosition * percAgg.percentiles.get(0).get(); + result.set(calc.getPercentile(entriesList, position)); + } + } + + public static class PercentileDiscCalculator implements PercentileCalculator { + /** + * Get the percentile value. + */ + public double getPercentile(List> entriesList, + double position) { + // We may need to do linear interpolation to get the exact percentile + long lower = (long) Math.floor(position); + long higher = (long) Math.ceil(position); + + // Linear search since this won't take much time from the total execution anyway + // lower has the range of [0 .. total-1] + // The first entry with accumulated count (lower+1) corresponds to the lower position. + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + long lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + // no interpolation needed because position does not have a fraction + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + return entriesList.get(i).getKey().get(); + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java new file mode 100644 index 0000000000..f011e42285 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileCalculator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContCalculator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator.PercentileAgg; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +public class TestGenericUDAFPercentileCont { + PercentileCalculator calc = new PercentileContCalculator(); + + @Test + public void testNoInterpolation() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testInterpolateLower() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.49, 2.96); + } + + @Test + public void testInterpolateHigher() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.51, 3.04); + } + + @Test + public void testSingleItem50() throws Exception { + Long[] items = new Long[] { 1L }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testSingleItem100() throws Exception { + Long[] items = new Long[] { 1L }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_cont(.50) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostresRefExample() throws Exception { + Long[] items = new Long[] { 54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_cont(.72) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostresRefExample2() throws Exception { + Long[] items = new Long[] { 54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.72, 76.64); + } + + private void checkPercentile(Long[] items, double percentile, double expected) throws Exception { + PercentileContLongEvaluator eval = new GenericUDAFPercentileCont.PercentileContLongEvaluator(); + + PercentileAgg agg = new PercentileAgg(); + + agg.percentiles = new ArrayList(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new LongWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java new file mode 100644 index 0000000000..9b491d1c5e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileCalculator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContCalculator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator.PercentileAgg; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; + +public class TestGenericUDAFPercentileDisc { + PercentileCalculator calc = new PercentileContCalculator(); + + @Test + public void testNoInterpolation() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.5, 3); + } + + @Test + public void testInterpolateLower() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.49, 3.0); + } + + @Test + public void testInterpolateHigher() throws Exception { + Long[] items = new Long[] { 1L, 2L, 3L, 4L, 5L }; + checkPercentile(items, 0.51, 4.0); + } + + @Test + public void testSingleItem50() throws Exception { + Long[] items = new Long[] { 1L }; + checkPercentile(items, 0.5, 1); + } + + @Test + public void testSingleItem100() throws Exception { + Long[] items = new Long[] { 1L }; + checkPercentile(items, 1, 1); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_disc(.50) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostresRefExample() throws Exception { + Long[] items = new Long[] { 54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.5, 54); + } + + /* + * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT * + * INTO table percentile_src FROM vals; select percentile_disc(.72) within group (order by k) as + * perc from percentile_src; + */ + @Test + public void testPostresRefExample2() throws Exception { + Long[] items = new Long[] { 54L, 35L, 15L, 15L, 76L, 87L, 78L }; + checkPercentile(items, 0.72, 78); + } + + private void checkPercentile(Long[] items, double percentile, double expected) throws Exception { + PercentileContLongEvaluator eval = new GenericUDAFPercentileDisc.PercentileDiscLongEvaluator(); + + PercentileAgg agg = new PercentileAgg(); + + agg.percentiles = new ArrayList(); + agg.percentiles.add(new DoubleWritable(percentile)); + + for (int i = 0; i < items.length; i++) { + eval.increment(agg, new LongWritable(items[i]), 1); + } + + DoubleWritable result = (DoubleWritable) eval.terminate(agg); + + Assert.assertEquals(expected, result.get(), 0.01); + eval.close(); + } +} diff --git ql/src/test/queries/clientpositive/udaf_percentile_cont.q ql/src/test/queries/clientpositive/udaf_percentile_cont.q new file mode 100644 index 0000000000..7b6a656601 --- /dev/null +++ ql/src/test/queries/clientpositive/udaf_percentile_cont.q @@ -0,0 +1,69 @@ +--! qt:dataset:src +DESCRIBE FUNCTION percentile_cont; +DESCRIBE FUNCTION EXTENDED percentile_cont; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +-- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +select percentile_cont(cast(key as bigint), 0.5) from src where false; diff --git ql/src/test/queries/clientpositive/udaf_percentile_disc.q ql/src/test/queries/clientpositive/udaf_percentile_disc.q new file mode 100644 index 0000000000..dd92b45033 --- /dev/null +++ ql/src/test/queries/clientpositive/udaf_percentile_disc.q @@ -0,0 +1,69 @@ +--! qt:dataset:src +DESCRIBE FUNCTION percentile_disc; +DESCRIBE FUNCTION EXTENDED percentile_disc; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +-- SORT_QUERY_RESULTS + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +-- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + +select percentile_disc(cast(key as bigint), 0.5) from src where false; diff --git ql/src/test/results/clientpositive/udaf_percentile_cont.q.out ql/src/test/results/clientpositive/udaf_percentile_cont.q.out new file mode 100644 index 0000000000..086f80fd47 --- /dev/null +++ ql/src/test/results/clientpositive/udaf_percentile_cont.q.out @@ -0,0 +1,421 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out new file mode 100644 index 0000000000..7400d0bd7b --- /dev/null +++ ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out @@ -0,0 +1,842 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont +POSTHOOK: type: DESCFUNCTION +percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]). +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(CAST(substr(value, 5) AS INT), 0.0), + percentile_cont(CAST(substr(value, 5) AS INT), 0.5), + percentile_cont(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +PREHOOK: query: DESCRIBE FUNCTION percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_disc +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'percentile_disc' +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'percentile_disc' +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 4.5 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 117.0 119.0 +12 120.0 127.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 154.0 158.0 +16 160.0 166.5 169.0 +17 170.0 175.0 179.0 +18 180.0 186.5 189.0 +19 190.0 194.5 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 216.5 219.0 +22 221.0 224.0 229.0 +23 230.0 234.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 264.0 266.0 +27 272.0 275.0 278.0 +28 280.0 283.5 289.0 +29 291.0 297.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 324.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 376.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 42.5 47.0 +40 400.0 403.5 409.0 +41 411.0 415.5 419.0 +42 421.0 425.5 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 467.5 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 494.5 498.0 +5 51.0 54.0 58.0 +6 64.0 66.5 69.0 +7 70.0 73.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git ql/src/test/results/clientpositive/udaf_percentile_disc.q.out ql/src/test/results/clientpositive/udaf_percentile_disc.q.out new file mode 100644 index 0000000000..2792012b20 --- /dev/null +++ ql/src/test/results/clientpositive/udaf_percentile_disc.q.out @@ -0,0 +1,421 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile_disc +POSTHOOK: type: DESCFUNCTION +percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc +POSTHOOK: type: DESCFUNCTION +percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc +Function type:BUILTIN +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(CAST(substr(value, 5) AS INT), 0.0), + percentile_disc(CAST(substr(value, 5) AS INT), 0.5), + percentile_disc(CAST(substr(value, 5) AS INT), 1.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 5.0 9.0 +1 10.0 15.0 19.0 +10 100.0 103.0 105.0 +11 111.0 118.0 119.0 +12 120.0 128.0 129.0 +13 131.0 137.0 138.0 +14 143.0 146.0 149.0 +15 150.0 155.0 158.0 +16 160.0 167.0 169.0 +17 170.0 175.0 179.0 +18 180.0 187.0 189.0 +19 190.0 195.0 199.0 +2 20.0 26.0 28.0 +20 200.0 205.0 209.0 +21 213.0 217.0 219.0 +22 221.0 224.0 229.0 +23 230.0 235.0 239.0 +24 241.0 244.0 249.0 +25 252.0 256.0 258.0 +26 260.0 265.0 266.0 +27 272.0 275.0 278.0 +28 280.0 284.0 289.0 +29 291.0 298.0 298.0 +3 30.0 35.0 37.0 +30 302.0 307.0 309.0 +31 310.0 316.0 318.0 +32 321.0 325.0 327.0 +33 331.0 333.0 339.0 +34 341.0 345.0 348.0 +35 351.0 353.0 356.0 +36 360.0 367.0 369.0 +37 373.0 377.0 379.0 +38 382.0 384.0 389.0 +39 392.0 396.0 399.0 +4 41.0 43.0 47.0 +40 400.0 404.0 409.0 +41 411.0 417.0 419.0 +42 421.0 427.0 429.0 +43 430.0 435.0 439.0 +44 443.0 446.0 449.0 +45 452.0 455.0 459.0 +46 460.0 468.0 469.0 +47 470.0 477.0 479.0 +48 480.0 484.0 489.0 +49 490.0 495.0 498.0 +5 51.0 54.0 58.0 +6 64.0 67.0 69.0 +7 70.0 74.0 78.0 +8 80.0 84.0 87.0 +9 90.0 95.0 98.0 +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(NULL, 0.0) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 NULL +1 NULL +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 NULL +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 NULL +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 NULL +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 1.0 +1 1.0 +10 NULL +11 NULL +12 NULL +13 NULL +14 NULL +15 NULL +16 NULL +17 NULL +18 NULL +19 NULL +2 1.0 +20 NULL +21 NULL +22 NULL +23 NULL +24 NULL +25 NULL +26 NULL +27 NULL +28 NULL +29 NULL +3 1.0 +30 NULL +31 NULL +32 NULL +33 NULL +34 NULL +35 NULL +36 NULL +37 NULL +38 NULL +39 NULL +4 1.0 +40 NULL +41 NULL +42 NULL +43 NULL +44 NULL +45 NULL +46 NULL +47 NULL +48 NULL +49 NULL +5 NULL +6 NULL +7 NULL +8 NULL +9 NULL +PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL