diff --git ql/src/test/queries/clientpositive/partitioned_table_stats.q ql/src/test/queries/clientpositive/partitioned_table_stats.q new file mode 100644 index 0000000000..73e46e89bb --- /dev/null +++ ql/src/test/queries/clientpositive/partitioned_table_stats.q @@ -0,0 +1,66 @@ +create table datatype_stats_n0( + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY) +PARTITIONED BY (t TINYINT); + +INSERT INTO datatype_stats_n0 values(3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin', 2); +INSERT INTO datatype_stats_n0 values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +DESC FORMATTED datatype_stats_n0 s; +DESC FORMATTED datatype_stats_n0 i; +DESC FORMATTED datatype_stats_n0 b; +DESC FORMATTED datatype_stats_n0 f; +DESC FORMATTED datatype_stats_n0 d; +DESC FORMATTED datatype_stats_n0 dem; +DESC FORMATTED datatype_stats_n0 ts; +DESC FORMATTED datatype_stats_n0 dt; +DESC FORMATTED datatype_stats_n0 str; +DESC FORMATTED datatype_stats_n0 v; +DESC FORMATTED datatype_stats_n0 c; +DESC FORMATTED datatype_stats_n0 bl; +DESC FORMATTED datatype_stats_n0 bin; +DESC FORMATTED datatype_stats_n0 t; + +INSERT INTO datatype_stats_n0 values(2, 44, 455, 45454.3, 454.6564, 2354, '2012-01-01 01:02:02', '2011-12-31', 'update_statistic', 'stat', 'hi', 'false', 'bi', 1); + +DESC FORMATTED datatype_stats_n0 s; +DESC FORMATTED datatype_stats_n0 i; +DESC FORMATTED datatype_stats_n0 b; +DESC FORMATTED datatype_stats_n0 f; +DESC FORMATTED datatype_stats_n0 d; +DESC FORMATTED datatype_stats_n0 dem; +DESC FORMATTED datatype_stats_n0 ts; +DESC FORMATTED datatype_stats_n0 dt; +DESC FORMATTED datatype_stats_n0 str; +DESC FORMATTED datatype_stats_n0 v; +DESC FORMATTED datatype_stats_n0 c; +DESC FORMATTED datatype_stats_n0 bl; +DESC FORMATTED datatype_stats_n0 bin; +DESC FORMATTED datatype_stats_n0 t; + +INSERT INTO datatype_stats_n0 values(4, 46, 457, 45454.5, 454.6566, 2356, '2012-01-01 01:02:04', '2012-01-02', 'update_statisticsss', 'statsss', 'hiveee', 'true', 'binnn', 4); + +DESC FORMATTED datatype_stats_n0 s; +DESC FORMATTED datatype_stats_n0 i; +DESC FORMATTED datatype_stats_n0 b; +DESC FORMATTED datatype_stats_n0 f; +DESC FORMATTED datatype_stats_n0 d; +DESC FORMATTED datatype_stats_n0 dem; +DESC FORMATTED datatype_stats_n0 ts; +DESC FORMATTED datatype_stats_n0 dt; +DESC FORMATTED datatype_stats_n0 str; +DESC FORMATTED datatype_stats_n0 v; +DESC FORMATTED datatype_stats_n0 c; +DESC FORMATTED datatype_stats_n0 bl; +DESC FORMATTED datatype_stats_n0 bin; +DESC FORMATTED datatype_stats_n0 t; diff --git ql/src/test/results/clientpositive/partitioned_table_stats.q.out ql/src/test/results/clientpositive/partitioned_table_stats.q.out new file mode 100644 index 0000000000..15be98512c --- /dev/null +++ ql/src/test/results/clientpositive/partitioned_table_stats.q.out @@ -0,0 +1,918 @@ +PREHOOK: query: create table datatype_stats_n0( + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY) +PARTITIONED BY (t TINYINT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@datatype_stats_n0 +POSTHOOK: query: create table datatype_stats_n0( + s SMALLINT, + i INT, + b BIGINT, + f FLOAT, + d DOUBLE, + dem DECIMAL, --default decimal (10,0) + ts TIMESTAMP, + dt DATE, + str STRING, + v VARCHAR(12), + c CHAR(5), + bl BOOLEAN, + bin BINARY) +PARTITIONED BY (t TINYINT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@datatype_stats_n0 +PREHOOK: query: INSERT INTO datatype_stats_n0 values(3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin', 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@datatype_stats_n0 +POSTHOOK: query: INSERT INTO datatype_stats_n0 values(3, 45, 456, 45454.4, 454.6565, 2355, '2012-01-01 01:02:03', '2012-01-01', 'update_statistics', 'stats', 'hive', 'true', 'bin', 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@datatype_stats_n0@t=2 +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).b SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).bin SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).bl SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).c SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).d SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).dem SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).dt SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).f SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).i SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).s SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).str SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).ts SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=2).v SCRIPT [] +PREHOOK: query: INSERT INTO datatype_stats_n0 values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@datatype_stats_n0 +POSTHOOK: query: INSERT INTO datatype_stats_n0 values(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@datatype_stats_n0@t=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).b EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).bin EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).bl EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).c EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).d EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).dem EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).dt EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).f EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).i EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).s EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).str EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).ts EXPRESSION [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=__HIVE_DEFAULT_PARTITION__).v EXPRESSION [] +PREHOOK: query: DESC FORMATTED datatype_stats_n0 s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name s +data_type smallint +min 3 +max 3 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"s\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name i +data_type int +min 45 +max 45 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"i\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name b +data_type bigint +min 456 +max 456 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"b\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name f +data_type float +min 45454.3984375 +max 45454.3984375 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"f\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name d +data_type double +min 454.6565 +max 454.6565 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"d\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dem +data_type decimal(10,0) +min 2355 +max 2355 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dem\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name ts +data_type timestamp +min 1325379723 +max 1325379723 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dt +data_type date +min 2012-01-01 +max 2012-01-01 +num_nulls 1 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dt\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name str +data_type string +min +max +num_nulls 1 +distinct_count 1 +avg_col_len 17.0 +max_col_len 17 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"str\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name v +data_type varchar(12) +min +max +num_nulls 1 +distinct_count 1 +avg_col_len 5.0 +max_col_len 5 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name c +data_type char(5) +min +max +num_nulls 1 +distinct_count 1 +avg_col_len 4.0 +max_col_len 4 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"c\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bl +data_type boolean +min +max +num_nulls 1 +distinct_count +avg_col_len +max_col_len +num_trues 1 +num_falses 0 +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bl\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bin +data_type binary +min +max +num_nulls 1 +distinct_count +avg_col_len 3.0 +max_col_len 3 +num_trues +num_falses +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bin\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name t +data_type tinyint +min 2 +max 2 +num_nulls 0 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector +comment +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"t\":\"true\"}} +PREHOOK: query: INSERT INTO datatype_stats_n0 values(2, 44, 455, 45454.3, 454.6564, 2354, '2012-01-01 01:02:02', '2011-12-31', 'update_statistic', 'stat', 'hi', 'false', 'bi', 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@datatype_stats_n0 +POSTHOOK: query: INSERT INTO datatype_stats_n0 values(2, 44, 455, 45454.3, 454.6564, 2354, '2012-01-01 01:02:02', '2011-12-31', 'update_statistic', 'stat', 'hi', 'false', 'bi', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@datatype_stats_n0@t=1 +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).b SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).bin SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).bl SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).c SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).d SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).dem SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).dt SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).f SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).i SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).s SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).str SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).ts SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=1).v SCRIPT [] +PREHOOK: query: DESC FORMATTED datatype_stats_n0 s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name s +data_type smallint +min 2 +max 3 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"s\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name i +data_type int +min 44 +max 45 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"i\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name b +data_type bigint +min 455 +max 456 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"b\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name f +data_type float +min 45454.30078125 +max 45454.3984375 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"f\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name d +data_type double +min 454.6564 +max 454.6565 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"d\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dem +data_type decimal(10,0) +min 2354 +max 2355 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dem\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name ts +data_type timestamp +min 1325379722 +max 1325379723 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dt +data_type date +min 2011-12-31 +max 2012-01-01 +num_nulls 1 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dt\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name str +data_type string +min +max +num_nulls 1 +distinct_count 2 +avg_col_len 17.0 +max_col_len 17 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"str\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name v +data_type varchar(12) +min +max +num_nulls 1 +distinct_count 2 +avg_col_len 5.0 +max_col_len 5 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name c +data_type char(5) +min +max +num_nulls 1 +distinct_count 2 +avg_col_len 4.0 +max_col_len 4 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"c\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bl +data_type boolean +min +max +num_nulls 1 +distinct_count +avg_col_len +max_col_len +num_trues 1 +num_falses 1 +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bl\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bin +data_type binary +min +max +num_nulls 1 +distinct_count +avg_col_len 3.0 +max_col_len 3 +num_trues +num_falses +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bin\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name t +data_type tinyint +min 1 +max 2 +num_nulls 0 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector +comment +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"t\":\"true\"}} +PREHOOK: query: INSERT INTO datatype_stats_n0 values(4, 46, 457, 45454.5, 454.6566, 2356, '2012-01-01 01:02:04', '2012-01-02', 'update_statisticsss', 'statsss', 'hiveee', 'true', 'binnn', 4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@datatype_stats_n0 +POSTHOOK: query: INSERT INTO datatype_stats_n0 values(4, 46, 457, 45454.5, 454.6566, 2356, '2012-01-01 01:02:04', '2012-01-02', 'update_statisticsss', 'statsss', 'hiveee', 'true', 'binnn', 4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@datatype_stats_n0@t=4 +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).b SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).bin SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).bl SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).c SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).d SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).dem SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).dt SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).f SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).i SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).s SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).str SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).ts SCRIPT [] +POSTHOOK: Lineage: datatype_stats_n0 PARTITION(t=4).v SCRIPT [] +PREHOOK: query: DESC FORMATTED datatype_stats_n0 s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name s +data_type smallint +min 2 +max 4 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"s\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name i +data_type int +min 44 +max 46 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"i\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name b +data_type bigint +min 455 +max 457 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"b\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 f +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 f +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name f +data_type float +min 45454.30078125 +max 45454.5 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"f\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name d +data_type double +min 454.6564 +max 454.6566 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"d\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dem +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dem +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dem +data_type decimal(10,0) +min 2354 +max 2356 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dem\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name ts +data_type timestamp +min 1325379722 +max 1325379724 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"ts\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 dt +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 dt +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name dt +data_type date +min 2011-12-31 +max 2012-01-02 +num_nulls 1 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"dt\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name str +data_type string +min +max +num_nulls 1 +distinct_count 3 +avg_col_len 19.0 +max_col_len 19 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"str\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 v +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 v +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name v +data_type varchar(12) +min +max +num_nulls 1 +distinct_count 3 +avg_col_len 7.0 +max_col_len 7 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"v\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name c +data_type char(5) +min +max +num_nulls 1 +distinct_count 3 +avg_col_len 5.0 +max_col_len 5 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"c\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bl +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bl +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bl +data_type boolean +min +max +num_nulls 1 +distinct_count +avg_col_len +max_col_len +num_trues 2 +num_falses 1 +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bl\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 bin +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 bin +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name bin +data_type binary +min +max +num_nulls 1 +distinct_count +avg_col_len 5.0 +max_col_len 5 +num_trues +num_falses +bitVector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"bin\":\"true\"}} +PREHOOK: query: DESC FORMATTED datatype_stats_n0 t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@datatype_stats_n0 +POSTHOOK: query: DESC FORMATTED datatype_stats_n0 t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@datatype_stats_n0 +col_name t +data_type tinyint +min 1 +max 4 +num_nulls 0 +distinct_count 4 +avg_col_len +max_col_len +num_trues +num_falses +bitVector +comment +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"t\":\"true\"}} diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java index ddc1455e2b..281ddaa90f 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.merge.DateColumnStatsMerger; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -99,17 +100,19 @@ public ColumnStatisticsObj aggregate(List colStatsWit ColumnStatisticsObj cso = csp.getColStatsObj(); DateColumnStatsDataInspector newData = dateInspectorFromStats(cso); higherBound += newData.getNumDVs(); - densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) - / newData.getNumDVs(); + if (newData.isSetLowValue() && newData.isSetHighValue()) { + densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) / newData.getNumDVs(); + } if (ndvEstimator != null) { ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); - aggregateData - .setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + DateColumnStatsMerger merger = new DateColumnStatsMerger(); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index cc18dabf4d..63bc3fdc5c 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.merge.DecimalColumnStatsMerger; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; @@ -102,26 +103,20 @@ public ColumnStatisticsObj aggregate(List colStatsWit DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(cso); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); - densityAvgSum += (MetaStoreServerUtils.decimalToDouble(newData.getHighValue()) - MetaStoreServerUtils - .decimalToDouble(newData.getLowValue())) / newData.getNumDVs(); + if (newData.isSetLowValue() && newData.isSetHighValue()) { + densityAvgSum += (MetaStoreServerUtils.decimalToDouble(newData.getHighValue()) - MetaStoreServerUtils + .decimalToDouble(newData.getLowValue())) / newData.getNumDVs(); + } if (ndvEstimator != null) { ndvEstimator.mergeEstimators(newData.getNdvEstimator()); } if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - if (MetaStoreServerUtils.decimalToDouble(aggregateData.getLowValue()) < MetaStoreServerUtils - .decimalToDouble(newData.getLowValue())) { - aggregateData.setLowValue(aggregateData.getLowValue()); - } else { - aggregateData.setLowValue(newData.getLowValue()); - } - if (MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) > MetaStoreServerUtils - .decimalToDouble(newData.getHighValue())) { - aggregateData.setHighValue(aggregateData.getHighValue()); - } else { - aggregateData.setHighValue(newData.getHighValue()); - } + DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger(); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index 7c217fc50f..6d4e6472aa 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.merge.DoubleColumnStatsMerger; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,9 +108,10 @@ public ColumnStatisticsObj aggregate(List colStatsWit if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); - aggregateData - .setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + DoubleColumnStatsMerger merger = new DoubleColumnStatsMerger(); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } @@ -156,7 +158,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit ColumnStatisticsObj cso = csp.getColStatsObj(); String partName = csp.getPartName(); DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); - if (useDensityFunctionForNDVEstimation) { + if (useDensityFunctionForNDVEstimation && newData.isSetLowValue() && newData.isSetHighValue()) { densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); } adjustedIndexMap.put(partName, (double) indexMap.get(partName)); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index 2b237e5816..ffde02455a 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.merge.LongColumnStatsMerger; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,9 +107,10 @@ public ColumnStatisticsObj aggregate(List colStatsWit if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); - aggregateData - .setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + LongColumnStatsMerger merger = new LongColumnStatsMerger(); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index a2232b0965..c46e0a90e4 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -57,7 +57,7 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateColStats.getStatsData().setDateStats(aggregateData); } - private void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { + public void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { return; } @@ -75,7 +75,7 @@ private void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnS aggregateData.setLowValue(mergedLowValue); } - private void setHighValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { + public void setHighValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { return; } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index a114188110..be8175f23d 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -24,8 +24,6 @@ import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; -import com.google.common.annotations.VisibleForTesting; - import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats; public class DecimalColumnStatsMerger extends ColumnStatsMerger { @@ -60,8 +58,7 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateColStats.getStatsData().setDecimalStats(aggregateData); } - @VisibleForTesting - void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { + public void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { return; } @@ -79,8 +76,7 @@ void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnSta aggregateData.setLowValue(mergedLowValue); } - @VisibleForTesting - void setHighValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { + public void setHighValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { return; } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java index a02f25b709..41f9095645 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -54,7 +54,7 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateColStats.getStatsData().setDoubleStats(aggregateData); } - private void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { + public void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { return; } @@ -64,13 +64,13 @@ private void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleCol aggregateData.setLowValue(lowValue); } - private void setHighValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { + public void setHighValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { return; } double highValue = Math.max( aggregateData.isSetHighValue() ? aggregateData.getHighValue() : Double.MIN_VALUE, newData.isSetHighValue() ? newData.getHighValue() : Double.MIN_VALUE); - aggregateData.setLowValue(highValue); + aggregateData.setHighValue(highValue); } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java index 67adbf1e98..dfc8421058 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -54,7 +54,7 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new aggregateColStats.getStatsData().setLongStats(aggregateData); } - private void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { + public void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { return; } @@ -64,7 +64,7 @@ private void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnS aggregateData.setLowValue(lowValue); } - private void setHighValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { + public void setHighValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { return; }