diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index a0e2e67..3f2675e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -38,12 +38,14 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; @@ -151,6 +153,16 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf // 1. Collect GB Keys RelNode aggInputRel = aggRel.getInput(); + Map mapToConstant = new HashMap<>(); + if (aggInputRel instanceof HiveProject) { + SelectOperator sop = (SelectOperator) inputOpAf.inputs.get(0); + for (int pos = 0; pos < sop.getConf().getColList().size(); pos++) { + ExprNodeDesc desc = sop.getConf().getColList().get(pos); + if (desc instanceof ExprNodeConstantDesc) { + mapToConstant.put(pos, (ExprNodeConstantDesc) desc.clone()); + } + } + } ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias, aggInputRel.getRowType(), new HashSet(), aggRel.getCluster().getTypeFactory()); @@ -158,7 +170,11 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf for (int i : aggRel.getGroupSet()) { RexInputRef iRef = new RexInputRef(i, aggInputRel.getRowType().getFieldList() .get(i).getType()); - tmpExprNodeDesc = iRef.accept(exprConv); + if (mapToConstant.containsKey(i)) { + tmpExprNodeDesc = mapToConstant.get(i); + } else { + tmpExprNodeDesc = iRef.accept(exprConv); + } gbInfo.gbKeys.add(tmpExprNodeDesc); gbInfo.gbKeyColNamesInInput.add(aggInputRel.getRowType().getFieldNames().get(i)); gbInfo.gbKeyTypes.add(tmpExprNodeDesc.getTypeInfo()); @@ -201,10 +217,15 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf List argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel); ExprNodeDesc distinctExpr; for (int i = 0; i < argLst.size(); i++) { - if (!distinctRefs.contains(argLst.get(i))) { - distinctRefs.add(argLst.get(i)); - distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size()); - distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv); + int pos = argLst.get(i); + if (!distinctRefs.contains(pos)) { + distinctRefs.add(pos); + distParamInRefsToOutputPos.put(pos, gbInfo.distExprNodes.size()); + if (mapToConstant.containsKey(pos)) { + distinctExpr = mapToConstant.get(pos); + } else { + distinctExpr = HiveCalciteUtil.getExprNode(pos, aggInputRel, exprConv); + } gbInfo.distExprNodes.add(distinctExpr); gbInfo.distExprNames.add(argNames.get(i)); gbInfo.distExprTypes.add(distinctExpr.getTypeInfo()); @@ -222,6 +243,12 @@ private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf UDAFAttrs udafAttrs = new UDAFAttrs(); List argExps = HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel, inputOpAf.tabAlias); + for (int index = 0; index < aggCall.getArgList().size(); index++) { + Integer pos = aggCall.getArgList().get(index); + if (mapToConstant.containsKey(pos)) { + argExps.set(index, mapToConstant.get(pos)); + } + } udafAttrs.udafParams.addAll(argExps); udafAttrs.udafName = aggCall.getAggregation().getName(); udafAttrs.isDistinctUDAF = aggCall.isDistinct(); diff --git a/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q new file mode 100644 index 0000000..a3cf4a5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q @@ -0,0 +1,97 @@ +set hive.cbo.returnpath.hiveop=true; +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket; +load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket; + +create table t1 (result double); +create table t2 (result double); +create table t3 (result double); +create table t4 (result double); +create table t5 (result double); +create table t6 (result double); +create table t7 (result array); +create table t8 (result array); +create table t9 (result array); +create table t10 (result array); +create table t11 (result array); +create table t12 (result array); + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.map.aggr=false; +-- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000); + +select * from t1; +select * from t2; +select * from t3; +select * from t4; +select * from t5; +select * from t6; +select * from t7; +select * from t8; +select * from t9; +select * from t10; +select * from t11; +select * from t12; + +set hive.map.aggr=true; +-- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000); + +select * from t1; +select * from t2; +select * from t3; +select * from t4; +select * from t5; +select * from t6; +select * from t7; +select * from t8; +select * from t9; +select * from t10; +select * from t11; +select * from t12; + +-- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket; + +-- with CBO +explain +select percentile_approx(key, 0.5) from bucket; +select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket; diff --git a/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out new file mode 100644 index 0000000..7e05e78 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out @@ -0,0 +1,636 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx + +CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket +PREHOOK: query: create table t1 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table t3 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: create table t4 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: create table t4 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: create table t5 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t5 +POSTHOOK: query: create table t5 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t5 +PREHOOK: query: create table t6 (result double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t6 +POSTHOOK: query: create table t6 (result double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t6 +PREHOOK: query: create table t7 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t7 +POSTHOOK: query: create table t7 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t7 +PREHOOK: query: create table t8 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t8 +POSTHOOK: query: create table t8 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t8 +PREHOOK: query: create table t9 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t9 +POSTHOOK: query: create table t9 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t9 +PREHOOK: query: create table t10 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t10 +POSTHOOK: query: create table t10 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t10 +PREHOOK: query: create table t11 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t11 +POSTHOOK: query: create table t11 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t11 +PREHOOK: query: create table t12 (result array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t12 +POSTHOOK: query: create table t12 (result array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t12 +PREHOOK: query: -- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t10 +PREHOOK: Output: default@t11 +PREHOOK: Output: default@t12 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t4 +PREHOOK: Output: default@t5 +PREHOOK: Output: default@t6 +PREHOOK: Output: default@t7 +PREHOOK: Output: default@t8 +PREHOOK: Output: default@t9 +POSTHOOK: query: -- disable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t10 +POSTHOOK: Output: default@t11 +POSTHOOK: Output: default@t12 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t5 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t7 +POSTHOOK: Output: default@t8 +POSTHOOK: Output: default@t9 +POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +PREHOOK: query: select * from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: select * from t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +#### A masked pattern was here #### +POSTHOOK: query: select * from t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t7 +PREHOOK: type: QUERY +PREHOOK: Input: default@t7 +#### A masked pattern was here #### +POSTHOOK: query: select * from t7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t7 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t8 +PREHOOK: type: QUERY +PREHOOK: Input: default@t8 +#### A masked pattern was here #### +POSTHOOK: query: select * from t8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t8 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t9 +PREHOOK: type: QUERY +PREHOOK: Input: default@t9 +#### A masked pattern was here #### +POSTHOOK: query: select * from t9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t9 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t10 +#### A masked pattern was here #### +POSTHOOK: query: select * from t10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t10 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t11 +PREHOOK: type: QUERY +PREHOOK: Input: default@t11 +#### A masked pattern was here #### +POSTHOOK: query: select * from t11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t11 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t12 +PREHOOK: type: QUERY +PREHOOK: Input: default@t12 +#### A masked pattern was here #### +POSTHOOK: query: select * from t12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t12 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: -- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t10 +PREHOOK: Output: default@t11 +PREHOOK: Output: default@t12 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t4 +PREHOOK: Output: default@t5 +PREHOOK: Output: default@t6 +PREHOOK: Output: default@t7 +PREHOOK: Output: default@t8 +PREHOOK: Output: default@t9 +POSTHOOK: query: -- enable map-side aggregation +FROM bucket +insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5) +insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100) +insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000) + +insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5) +insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100) +insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000) + +insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98)) +insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000) + +insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98)) +insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100) +insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t10 +POSTHOOK: Output: default@t11 +POSTHOOK: Output: default@t12 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t5 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t7 +POSTHOOK: Output: default@t8 +POSTHOOK: Output: default@t9 +POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ] +PREHOOK: query: select * from t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: select * from t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +254.08333333333334 +PREHOOK: query: select * from t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +#### A masked pattern was here #### +POSTHOOK: query: select * from t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +#### A masked pattern was here #### +255.5 +PREHOOK: query: select * from t7 +PREHOOK: type: QUERY +PREHOOK: Input: default@t7 +#### A masked pattern was here #### +POSTHOOK: query: select * from t7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t7 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t8 +PREHOOK: type: QUERY +PREHOOK: Input: default@t8 +#### A masked pattern was here #### +POSTHOOK: query: select * from t8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t8 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t9 +PREHOOK: type: QUERY +PREHOOK: Input: default@t9 +#### A masked pattern was here #### +POSTHOOK: query: select * from t9 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t9 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t10 +#### A masked pattern was here #### +POSTHOOK: query: select * from t10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t10 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: select * from t11 +PREHOOK: type: QUERY +PREHOOK: Input: default@t11 +#### A masked pattern was here #### +POSTHOOK: query: select * from t11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t11 +#### A masked pattern was here #### +[23.355555555555558,254.08333333333334,477.0625,488.38271604938274] +PREHOOK: query: select * from t12 +PREHOOK: type: QUERY +PREHOOK: Input: default@t12 +#### A masked pattern was here #### +POSTHOOK: query: select * from t12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t12 +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: -- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- NaN +explain +select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((key < 100.0)) THEN (NaN) ELSE (key) END (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx($f0, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f0 (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +true +PREHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +PREHOOK: type: QUERY +POSTHOOK: query: -- with CBO +explain +select percentile_approx(key, 0.5) from bucket +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: bucket + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: percentile_approx($f0, 0.5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: percentile_approx(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f0 (type: double) + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket +#### A masked pattern was here #### +POSTHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket +#### A masked pattern was here #### +true