commit 1ce82a958b96b39fccdce14472cda06bbcc5a673 Author: Janaki Lahorani Date: Tue Apr 10 15:51:08 2018 -0700 HIVE-19160: Fix statistics merge code to handle null values for decimal columns Change-Id: I76eb97e0e081b4750127bfb34288c077801d97dd diff --git ql/src/test/queries/clientpositive/decimalTest.q ql/src/test/queries/clientpositive/decimalTest.q new file mode 100644 index 0000000000000000000000000000000000000000..622d579867d9562a5310b6c5776f825062a19e4e --- /dev/null +++ ql/src/test/queries/clientpositive/decimalTest.q @@ -0,0 +1,38 @@ +drop table if exists testDecimal; +create table testDecimal +(cId TINYINT, + cBigInt DECIMAL, + cInt DECIMAL, + cSmallInt DECIMAL, + cTinyint DECIMAL); + +insert into testDecimal values +(1, + 1234567890123456789, + 1234567890, + 12345, + 123); + +insert into testDecimal values +(2, + 1, + 2, + 3, + 4); + +insert into testDecimal values +(3, + 1234567890123456789, + 1234567890, + 12345, + 123); + +insert into testDecimal values +(4, + -1234567890123456789, + -1234567890, + -12345, + -123); + +select cId, cBigInt, cInt, cSmallInt, cTinyint from testDecimal order by cId; +drop table if exists testDecimal; \ No newline at end of file diff --git ql/src/test/results/clientpositive/decimalTest.q.out ql/src/test/results/clientpositive/decimalTest.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8894f5a0fd39a8201d229de70110e3ca15dae17f --- /dev/null +++ ql/src/test/results/clientpositive/decimalTest.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: drop table if exists testDecimal +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testDecimal +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testDecimal +(cId TINYINT, + cBigInt DECIMAL, + cInt DECIMAL, + cSmallInt DECIMAL, + cTinyint DECIMAL) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testDecimal +POSTHOOK: query: create table testDecimal +(cId TINYINT, + cBigInt DECIMAL, + cInt DECIMAL, + cSmallInt DECIMAL, + cTinyint DECIMAL) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testDecimal +PREHOOK: query: insert into testDecimal values +(1, + 1234567890123456789, + 1234567890, + 12345, + 123) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testdecimal +POSTHOOK: query: insert into testDecimal values +(1, + 1234567890123456789, + 1234567890, + 12345, + 123) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testdecimal +POSTHOOK: Lineage: testdecimal.cbigint SCRIPT [] +POSTHOOK: Lineage: testdecimal.cid SCRIPT [] +POSTHOOK: Lineage: testdecimal.cint SCRIPT [] +POSTHOOK: Lineage: testdecimal.csmallint SCRIPT [] +POSTHOOK: Lineage: testdecimal.ctinyint SCRIPT [] +PREHOOK: query: insert into testDecimal values +(2, + 1, + 2, + 3, + 4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testdecimal +POSTHOOK: query: insert into testDecimal values +(2, + 1, + 2, + 3, + 4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testdecimal +POSTHOOK: Lineage: testdecimal.cbigint SCRIPT [] +POSTHOOK: Lineage: testdecimal.cid SCRIPT [] +POSTHOOK: Lineage: testdecimal.cint SCRIPT [] +POSTHOOK: Lineage: testdecimal.csmallint SCRIPT [] +POSTHOOK: Lineage: testdecimal.ctinyint SCRIPT [] +PREHOOK: query: insert into testDecimal values +(3, + 1234567890123456789, + 1234567890, + 12345, + 123) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testdecimal +POSTHOOK: query: insert into testDecimal values +(3, + 1234567890123456789, + 1234567890, + 12345, + 123) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testdecimal +POSTHOOK: Lineage: testdecimal.cbigint SCRIPT [] +POSTHOOK: Lineage: testdecimal.cid SCRIPT [] +POSTHOOK: Lineage: testdecimal.cint SCRIPT [] +POSTHOOK: Lineage: testdecimal.csmallint SCRIPT [] +POSTHOOK: Lineage: testdecimal.ctinyint SCRIPT [] +PREHOOK: query: insert into testDecimal values +(4, + -1234567890123456789, + -1234567890, + -12345, + -123) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testdecimal +POSTHOOK: query: insert into testDecimal values +(4, + -1234567890123456789, + -1234567890, + -12345, + -123) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testdecimal +POSTHOOK: Lineage: testdecimal.cbigint SCRIPT [] +POSTHOOK: Lineage: testdecimal.cid SCRIPT [] +POSTHOOK: Lineage: testdecimal.cint SCRIPT [] +POSTHOOK: Lineage: testdecimal.csmallint SCRIPT [] +POSTHOOK: Lineage: testdecimal.ctinyint SCRIPT [] +PREHOOK: query: select cId, cBigInt, cInt, cSmallInt, cTinyint from testDecimal order by cId +PREHOOK: type: QUERY +PREHOOK: Input: default@testdecimal +#### A masked pattern was here #### +POSTHOOK: query: select cId, cBigInt, cInt, cSmallInt, cTinyint from testDecimal order by cId +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testdecimal +#### A masked pattern was here #### +1 NULL 1234567890 12345 123 +2 1 2 3 4 +3 NULL 1234567890 12345 123 +4 NULL -1234567890 -12345 -123 +PREHOOK: query: drop table if exists testDecimal +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testdecimal +PREHOOK: Output: default@testdecimal +POSTHOOK: query: drop table if exists testDecimal +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testdecimal +POSTHOOK: Output: default@testdecimal diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index 01f3385d70e06085c5b755a5d1ee7cfb43f37fe9..07d39269c9012459d8091e6408953dadf3bf36f0 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -26,19 +26,28 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger { @Override - public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + public void merge(ColumnStatisticsObj newColStats, ColumnStatisticsObj aggregateColStats) { DecimalColumnStatsDataInspector aggregateData = (DecimalColumnStatsDataInspector) aggregateColStats.getStatsData().getDecimalStats(); DecimalColumnStatsDataInspector newData = (DecimalColumnStatsDataInspector) newColStats.getStatsData().getDecimalStats(); - Decimal lowValue = aggregateData.getLowValue() != null - && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData - .getLowValue() : newData.getLowValue(); - aggregateData.setLowValue(lowValue); - Decimal highValue = aggregateData.getHighValue() != null - && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData - .getHighValue() : newData.getHighValue(); - aggregateData.setHighValue(highValue); + + if (newData.getLowValue() != null) { + // A new low value can be possible + Decimal lowValue = aggregateData.getLowValue() != null && + aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0 ? + aggregateData.getLowValue() : newData.getLowValue(); + aggregateData.setLowValue(lowValue); + } + + if (newData.getHighValue() != null) { + // A new high value can be possible + Decimal highValue = aggregateData.getHighValue() != null && + aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0 ? + aggregateData.getHighValue() : newData.getHighValue(); + aggregateData.setHighValue(highValue); + } + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));