diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java index 286f24c..2b91b8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java @@ -235,16 +235,15 @@ public class UDAFPercentile extends UDAF { public boolean iterate(LongWritable o, List percentiles) { if (state.percentiles == null) { if(percentiles != null) { - for (int i = 0; i < percentiles.size(); i++) { - if (percentiles.get(i).get() < 0.0 || percentiles.get(i).get() > 1.0) { - throw new RuntimeException("Percentile value must be wihin the range of 0 to 1."); - } - } - - state.percentiles = new ArrayList(percentiles); - } + for (int i = 0; i < percentiles.size(); i++) { + if (percentiles.get(i).get() < 0.0 || percentiles.get(i).get() > 1.0) { + throw new RuntimeException("Percentile value must be wihin the range of 0 to 1."); + } + } + state.percentiles = new ArrayList(percentiles); + } else { - state.percentiles = new ArrayList(); + state.percentiles = new ArrayList(); } } if (o != null) { diff --git ql/src/test/queries/clientpositive/udf_percentile.q ql/src/test/queries/clientpositive/udf_percentile.q index d6063c8..936a514 100644 --- ql/src/test/queries/clientpositive/udf_percentile.q +++ ql/src/test/queries/clientpositive/udf_percentile.q @@ -70,3 +70,6 @@ FROM src GROUP BY CAST(key AS INT) DIV 10; select percentile(cast(key as bigint), 0.5) from src where false; + +-- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false; diff --git ql/src/test/results/clientpositive/udf_percentile.q.out ql/src/test/results/clientpositive/udf_percentile.q.out index f0d3509..5aaf923 100644 --- ql/src/test/results/clientpositive/udf_percentile.q.out +++ ql/src/test/results/clientpositive/udf_percentile.q.out @@ -1,3 +1,4 @@ +Preprocessor null PREHOOK: query: DESCRIBE FUNCTION percentile PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION percentile @@ -17,7 +18,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-18_084_6890848923874964805/-mr-10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -27,7 +28,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-18_084_6890848923874964805/-mr-10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -87,7 +88,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-43_065_5321252789457608463/-mr-10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -97,7 +98,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-43_065_5321252789457608463/-mr-10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -157,7 +158,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-52_368_5674905026941096088/-mr-10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -167,7 +168,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-23-52_368_5674905026941096088/-mr-10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -227,7 +228,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-00_607_6343229207169007881/-mr-10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -237,7 +238,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-00_607_6343229207169007881/-mr-10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -296,7 +297,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-13_194_161623296168522112/-mr-10000 POSTHOOK: query: -- test null handling SELECT CAST(key AS INT) DIV 10, percentile(NULL, 0.0), @@ -305,7 +306,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-13_194_161623296168522112/-mr-10000 0 NULL null 1 NULL null 2 NULL null @@ -364,7 +365,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-17_682_2910243438714645937/-mr-10000 POSTHOOK: query: -- test empty array handling SELECT CAST(key AS INT) DIV 10, percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), @@ -373,7 +374,7 @@ FROM src GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-17_682_2910243438714645937/-mr-10000 0 1.0 [1.0,1.0,1.0,1.0] 1 1.0 [1.0,1.0,1.0,1.0] 2 1.0 [1.0,1.0,1.0,1.0] @@ -427,9 +428,20 @@ POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_9767353996844314 PREHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000 +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-23_004_1504430038702698452/-mr-10000 POSTHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000 +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-23_004_1504430038702698452/-mr-10000 NULL +PREHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-26_477_1414118163857775329/-mr-10000 +POSTHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/vaggarw/hive_2011-08-01_16-24-26_477_1414118163857775329/-mr-10000 +null