diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 0200506..ee46121 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -688,8 +688,13 @@ public Void visitCall(org.apache.calcite.rex.RexCall call) { public static ImmutableList getInputRef(List inputRefs, RelNode inputRel) { ImmutableList.Builder bldr = ImmutableList. builder(); - for (int i : inputRefs) { - bldr.add(new RexInputRef(i, inputRel.getRowType().getFieldList().get(i).getType())); + List exprs = inputRel.getChildExps(); + for (int index : inputRefs) { + if (exprs.get(index) instanceof RexLiteral) { + bldr.add(exprs.get(index)); + } else { + bldr.add(new RexInputRef(index, inputRel.getRowType().getFieldList().get(index).getType())); + } } return bldr.build(); } diff --git a/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q new file mode 100644 index 0000000..a3cf4a5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q @@ -0,0 +1,97 @@ +set hive.cbo.returnpath.hiveop=true; +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket; + +create table t1 (result double); +create table t2 (result double); +create table t3 (result double); +create table t4 (result double); +create table t5 (result double); +create table t6 (result double); +create table t7 (result array); +create table t8 (result array); +create table t9 (result array); +create table t10 (result array); +create table t11 (result array); +create table t12 (result array); + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.map.aggr=false; +-- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000); + +select * from t1; +select * from t2; +select * from t3; +select * from t4; +select * from t5; +select * from t6; +select * from t7; +select * from t8; +select * from t9; +select * from t10; +select * from t11; +select * from t12; + +set hive.map.aggr=true; +-- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000); + +select * from t1; +select * from t2; +select * from t3; +select * from t4; +select * from t5; +select * from t6; +select * from t7; +select * from t8; +select * from t9; +select * from t10; +select * from t11; +select * from t12; + +-- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket; + +-- with CBO +explain +select percentile_approx(key, 0.5) from bucket; +select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket; diff --git a/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out new file mode 100644 index 0000000..02d061a --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out @@ -0,0 +1,628 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: create table t1 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table t3 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: create table t4 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: create table t4 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: create table t5 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t5 +POSTHOOK: query: create table t5 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t5 +PREHOOK: query: create table t6 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t6 +POSTHOOK: query: create table t6 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t6 +PREHOOK: query: create table t7 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t7 +POSTHOOK: query: create table t7 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t7 +PREHOOK: query: create table t8 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t8 +POSTHOOK: query: create table t8 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t8 +PREHOOK: query: create table t9 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t9 +POSTHOOK: query: create table t9 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t9 +PREHOOK: query: create table t10 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t10 +POSTHOOK: query: create table t10 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t10 +PREHOOK: query: create table t11 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t11 +POSTHOOK: query: create table t11 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t11 +PREHOOK: query: create table t12 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t12 +POSTHOOK: query: create table t12 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t12 +PREHOOK: query: -- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t10 +PREHOOK: Output: default@t11 +PREHOOK: Output: default@t12 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t4 +PREHOOK: Output: default@t5 +PREHOOK: Output: default@t6 +PREHOOK: Output: default@t7 +PREHOOK: Output: default@t8 +PREHOOK: Output: default@t9 +POSTHOOK: query: -- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t10 +POSTHOOK: Output: default@t11 +POSTHOOK: Output: default@t12 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t5 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t7 +POSTHOOK: Output: default@t8 +POSTHOOK: Output: default@t9 +POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +PREHOOK: query: select * from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: select * from t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +#### A masked pattern was here #### +POSTHOOK: query: select * from t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t7 +PREHOOK: type: QUERY +PREHOOK: Input: default@t7 +#### A masked pattern was here #### +POSTHOOK: query: select * from t7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t7 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t8 +PREHOOK: type: QUERY +PREHOOK: Input: default@t8 +#### A masked pattern was here #### +POSTHOOK: query: select * from t8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t8 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t9 +PREHOOK: type: QUERY +PREHOOK: Input: default@t9 +#### A masked pattern was here #### +POSTHOOK: query: select * from t9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t9 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t10 +#### A masked pattern was here #### +POSTHOOK: query: select * from t10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t10 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t11 +PREHOOK: type: QUERY +PREHOOK: Input: default@t11 +#### A masked pattern was here #### +POSTHOOK: query: select * from t11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t11 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t12 +PREHOOK: type: QUERY +PREHOOK: Input: default@t12 +#### A masked pattern was here #### +POSTHOOK: query: select * from t12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t12 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: -- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t10 +PREHOOK: Output: default@t11 +PREHOOK: Output: default@t12 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t4 +PREHOOK: Output: default@t5 +PREHOOK: Output: default@t6 +PREHOOK: Output: default@t7 +PREHOOK: Output: default@t8 +PREHOOK: Output: default@t9 +POSTHOOK: query: -- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t10 +POSTHOOK: Output: default@t11 +POSTHOOK: Output: default@t12 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t5 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t7 +POSTHOOK: Output: default@t8 +POSTHOOK: Output: default@t9 +POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +PREHOOK: query: select * from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: select * from t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +#### A masked pattern was here #### +POSTHOOK: query: select * from t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t7 +PREHOOK: type: QUERY +PREHOOK: Input: default@t7 +#### A masked pattern was here #### +POSTHOOK: query: select * from t7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t7 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t8 +PREHOOK: type: QUERY +PREHOOK: Input: default@t8 +#### A masked pattern was here #### +POSTHOOK: query: select * from t8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t8 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t9 +PREHOOK: type: QUERY +PREHOOK: Input: default@t9 +#### A masked pattern was here #### +POSTHOOK: query: select * from t9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t9 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t10 +#### A masked pattern was here #### +POSTHOOK: query: select * from t10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t10 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t11 +PREHOOK: type: QUERY +PREHOOK: Input: default@t11 +#### A masked pattern was here #### +POSTHOOK: query: select * from t11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t11 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t12 +PREHOOK: type: QUERY +PREHOOK: Input: default@t12 +#### A masked pattern was here #### +POSTHOOK: query: select * from t12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t12 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: -- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((key < 100.0)) THEN (NaN) ELSE (key) END (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx($f0, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +true +PREHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx($f0, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +true