diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 149cbc1..af35322 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -18,12 +18,16 @@ package org.apache.hadoop.hive.ql.stats; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.math.LongMath; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,7 +48,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; @@ -92,16 +95,12 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.AnnotationUtils; import org.apache.tez.mapreduce.hadoop.MRJobConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import com.google.common.math.LongMath; public class StatsUtils { @@ -710,12 +709,26 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab cs.setCountDistint(csd.getDecimalStats().getNumDVs()); cs.setNumNulls(csd.getDecimalStats().getNumNulls()); Decimal val = csd.getDecimalStats().getHighValue(); - BigDecimal maxVal = HiveDecimal. - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); - val = csd.getDecimalStats().getLowValue(); - BigDecimal minVal = HiveDecimal. - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); - cs.setRange(minVal, maxVal); + if (val != null && val.getUnscaled() != null) { + HiveDecimal hd = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()); + BigDecimal maxVal = null; + if (hd != null) { + maxVal = hd.bigDecimalValue(); + val = csd.getDecimalStats().getLowValue(); + } + + hd = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()); + BigDecimal minVal = null; + if (hd != null) { + hd.bigDecimalValue(); + } + + if (minVal != null && maxVal != null) { + cs.setRange(minVal, maxVal); + } + } } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); } else { diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q new file mode 100644 index 0000000..2370e7d --- /dev/null +++ b/ql/src/test/queries/clientpositive/decimal_stats.q @@ -0,0 +1,16 @@ +set hive.stats.fetch.column.stats=true; +drop table if exists decimal_1; + +create table decimal_1 (t decimal(4,2), u decimal(5), v decimal); + +desc decimal_1; + +insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src; + +analyze table decimal_1 compute statistics for columns; + +desc formatted decimal_1 v; + +explain select * from decimal_1 order by 1 limit 100; +drop table decimal_1; diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out new file mode 100644 index 0000000..dabf7f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: drop table if exists decimal_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists decimal_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: desc decimal_1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@decimal_1 +POSTHOOK: query: desc decimal_1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@decimal_1 +t decimal(4,2) +u decimal(5,0) +v decimal(10,0) +PREHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +POSTHOOK: Lineage: decimal_1.u EXPRESSION [] +POSTHOOK: Lineage: decimal_1.v EXPRESSION [] +PREHOOK: query: analyze table decimal_1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table decimal_1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +PREHOOK: query: desc formatted decimal_1 v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@decimal_1 +POSTHOOK: query: desc formatted decimal_1 v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@decimal_1 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +v decimal(10,0) 500 1 from deserializer +PREHOOK: query: explain select * from decimal_1 order by 1 limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: decimal_1 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1