Index: ql/src/test/results/clientpositive/udf_percentile.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_percentile.q.out (revision 930958) +++ ql/src/test/results/clientpositive/udf_percentile.q.out (working copy) @@ -17,7 +17,7 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-29_731_4236783179618139554/10000 +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -27,7 +27,7 @@ GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-29_731_4236783179618139554/10000 +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -87,7 +87,7 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-34_361_7854657244563137261/10000 +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -97,7 +97,7 @@ GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-34_361_7854657244563137261/10000 +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -157,7 +157,7 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-38_451_6456445188085972700/10000 +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -167,7 +167,7 @@ GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-38_451_6456445188085972700/10000 +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -227,7 +227,7 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-45_662_669810203047990628/10000 +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, percentile(CAST(substr(value, 5) AS INT), 0.0), percentile(CAST(substr(value, 5) AS INT), 0.5), @@ -237,7 +237,7 @@ GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-45_662_669810203047990628/10000 +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000 0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] 1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] 2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] @@ -296,7 +296,7 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-53_228_1913274328173734747/10000 +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000 POSTHOOK: query: -- test null handling SELECT CAST(key AS INT) DIV 10, percentile(NULL, 0.0), @@ -305,7 +305,7 @@ GROUP BY CAST(key AS INT) DIV 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-03-23_16-16-53_228_1913274328173734747/10000 +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000 0 NULL null 1 NULL null 2 NULL null @@ -356,3 +356,71 @@ 47 NULL null 48 NULL null 49 NULL null +PREHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000 +POSTHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000 +0 1.0 [1.0,1.0,1.0,1.0] +1 1.0 [1.0,1.0,1.0,1.0] +2 1.0 [1.0,1.0,1.0,1.0] +3 1.0 [1.0,1.0,1.0,1.0] +4 1.0 [1.0,1.0,1.0,1.0] +5 NULL null +6 NULL null +7 NULL null +8 NULL null +9 NULL null +10 NULL null +11 NULL null +12 NULL null +13 NULL null +14 NULL null +15 NULL null +16 NULL null +17 NULL null +18 NULL null +19 NULL null +20 NULL null +21 NULL null +22 NULL null +23 NULL null +24 NULL null +25 NULL null +26 NULL null +27 NULL null +28 NULL null +29 NULL null +30 NULL null +31 NULL null +32 NULL null +33 NULL null +34 NULL null +35 NULL null +36 NULL null +37 NULL null +38 NULL null +39 NULL null +40 NULL null +41 NULL null +42 NULL null +43 NULL null +44 NULL null +45 NULL null +46 NULL null +47 NULL null +48 NULL null +49 NULL null Index: ql/src/test/queries/clientpositive/udf_percentile.q =================================================================== --- ql/src/test/queries/clientpositive/udf_percentile.q (revision 930958) +++ ql/src/test/queries/clientpositive/udf_percentile.q (working copy) @@ -60,3 +60,11 @@ percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) FROM src GROUP BY CAST(key AS INT) DIV 10; + + +-- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (revision 930958) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (working copy) @@ -132,12 +132,17 @@ public void init() { if (state.counts != null) { + // We reuse the same hashmap to reduce new object allocation. + // This means counts can be empty when there is no input data. state.counts.clear(); } } public boolean iterate(LongWritable o, double percentile) { if (state.percentiles == null) { + if (percentile < 0.0 || percentile > 1.0) { + throw new RuntimeException("Percentile value must be wihin the range of 0 to 1."); + } state.percentiles = new ArrayList(1); state.percentiles.add(new DoubleWritable(percentile)); } @@ -167,7 +172,7 @@ public DoubleWritable terminate() { // No input data. - if (state.counts == null) { + if (state.counts == null || state.counts.size() == 0) { return null; } @@ -211,12 +216,19 @@ public void init() { if (state.counts != null) { + // We reuse the same hashmap to reduce new object allocation. + // This means counts can be empty when there is no input data. state.counts.clear(); } } public boolean iterate(LongWritable o, List percentiles) { if (state.percentiles == null) { + for (int i = 0; i < percentiles.size(); i++) { + if (percentiles.get(i).get() < 0.0 || percentiles.get(i).get() > 1.0) { + throw new RuntimeException("Percentile value must be wihin the range of 0 to 1."); + } + } state.percentiles = new ArrayList(percentiles); } if (o != null) { @@ -246,7 +258,7 @@ public List terminate() { // No input data - if (state.counts == null) { + if (state.counts == null || state.counts.size() == 0) { return null; }