diff --git a/data/scripts/q_test_init.sql b/data/scripts/q_test_init.sql index 6147959..73a76f7 100644 --- a/data/scripts/q_test_init.sql +++ b/data/scripts/q_test_init.sql @@ -7,6 +7,8 @@ CREATE TABLE src (key STRING COMMENT 'default', value STRING COMMENT 'default') LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" INTO TABLE src; +ANALYZE TABLE src COMPUTE STATISTICS FOR COLUMNS key,value; + -- -- Table src1 -- @@ -16,6 +18,8 @@ CREATE TABLE src1 (key STRING COMMENT 'default', value STRING COMMENT 'default') LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1; +ANALYZE TABLE src1 COMPUTE STATISTICS FOR COLUMNS key,value; + -- -- Table src_json -- @@ -25,6 +29,7 @@ CREATE TABLE src_json (json STRING COMMENT 'default') STORED AS TEXTFILE; LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/json.txt" INTO TABLE src_json; +ANALYZE TABLE src_json COMPUTE STATISTICS FOR COLUMNS json; -- -- Table src_sequencefile @@ -35,6 +40,7 @@ CREATE TABLE src_sequencefile (key STRING COMMENT 'default', value STRING COMMEN LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.seq" INTO TABLE src_sequencefile; +ANALYZE TABLE src_sequencefile COMPUTE STATISTICS FOR COLUMNS key,value; -- -- Table src_thrift @@ -50,7 +56,6 @@ STORED AS SEQUENCEFILE; LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/complex.seq" INTO TABLE src_thrift; - -- -- Table srcbucket -- @@ -63,6 +68,7 @@ STORED AS TEXTFILE; LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket0.txt" INTO TABLE srcbucket; LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket1.txt" INTO TABLE srcbucket; +ANALYZE TABLE srcbucket COMPUTE STATISTICS FOR COLUMNS key,value; -- -- Table srcbucket2 @@ -78,6 +84,7 @@ LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket21.txt" INTO TABLE sr LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket22.txt" INTO TABLE srcbucket2; LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket23.txt" INTO TABLE srcbucket2; +ANALYZE TABLE srcbucket2 COMPUTE STATISTICS FOR COLUMNS key,value; -- -- Table srcpart @@ -100,6 +107,7 @@ OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12"); +ANALYZE TABLE srcpart PARTITION(ds, hr) COMPUTE STATISTICS FOR COLUMNS key,value; -- -- Table alltypesorc @@ -123,6 +131,7 @@ CREATE TABLE alltypesorc( LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/alltypesorc" OVERWRITE INTO TABLE alltypesorc; +ANALYZE TABLE alltypesorc COMPUTE STATISTICS FOR COLUMNS ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2; -- -- Table primitives @@ -214,4 +223,4 @@ DROP TABLE IF EXISTS dest4_sequencefile; CREATE TABLE dest4_sequencefile (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat'; \ No newline at end of file +OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat'; diff --git a/ql/src/test/queries/clientpositive/confirm_initial_tbl_stats.q b/ql/src/test/queries/clientpositive/confirm_initial_tbl_stats.q new file mode 100644 index 0000000..6f7bb53 --- /dev/null +++ b/ql/src/test/queries/clientpositive/confirm_initial_tbl_stats.q @@ -0,0 +1,43 @@ +describe extended src; + +describe formatted src.key; + +describe extended src1; + +describe formatted src1.value; + +describe extended src_json; + +describe formatted src_json.json; + +describe extended src_sequencefile; + +describe formatted src_sequencefile.value; + +describe extended srcbucket; + +describe formatted srcbucket.value; + +describe extended srcbucket2; + +describe formatted srcbucket2.value; + +describe extended srcpart; + +describe formatted srcpart.key PARTITION (ds="2008-04-09", hr="12"); + +describe extended alltypesorc; + +describe formatted alltypesorc.ctinyint; + +describe formatted alltypesorc.cfloat; + +describe formatted alltypesorc.ctimestamp1; + +describe formatted alltypesorc.cboolean2; + + + + + + diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out new file mode 100644 index 0000000..3ef6bc0 --- /dev/null +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: describe extended src +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: describe extended src +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +key string default +value string default + +#### A masked pattern was here #### +PREHOOK: query: describe formatted src.key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: describe formatted src.key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe extended src1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src1 +POSTHOOK: query: describe extended src1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src1 +key string default +value string default + +#### A masked pattern was here #### +PREHOOK: query: describe formatted src1.value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src1 +POSTHOOK: query: describe formatted src1.value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src1 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 14 4.92 7 from deserializer +PREHOOK: query: describe extended src_json +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_json +POSTHOOK: query: describe extended src_json +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_json +json string default + +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_json.json +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_json +POSTHOOK: query: describe formatted src_json.json +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_json +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +json string 0 1 644.0 644 from deserializer +PREHOOK: query: describe extended src_sequencefile +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_sequencefile +POSTHOOK: query: describe extended src_sequencefile +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_sequencefile +key string default +value string default + +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_sequencefile.value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_sequencefile +POSTHOOK: query: describe formatted src_sequencefile.value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_sequencefile +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 214 6.812 7 from deserializer +PREHOOK: query: describe extended srcbucket +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcbucket +POSTHOOK: query: describe extended srcbucket +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcbucket +key int +value string + +#### A masked pattern was here #### +PREHOOK: query: describe formatted srcbucket.value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcbucket +POSTHOOK: query: describe formatted srcbucket.value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcbucket +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 234 6.802 7 from deserializer +PREHOOK: query: describe extended srcbucket2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcbucket2 +POSTHOOK: query: describe extended srcbucket2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcbucket2 +key int +value string + +#### A masked pattern was here #### +PREHOOK: query: describe formatted srcbucket2.value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcbucket2 +POSTHOOK: query: describe formatted srcbucket2.value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcbucket2 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 214 6.812 7 from deserializer +PREHOOK: query: describe extended srcpart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: describe extended srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +key string default +value string default +ds string +hr string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +#### A masked pattern was here #### +PREHOOK: query: describe formatted srcpart.key PARTITION (ds="2008-04-09", hr="12") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: describe formatted srcpart.key PARTITION (ds="2008-04-09", hr="12") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer +PREHOOK: query: describe extended alltypesorc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: describe extended alltypesorc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alltypesorc +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +#### A masked pattern was here #### +PREHOOK: query: describe formatted alltypesorc.ctinyint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: describe formatted alltypesorc.ctinyint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alltypesorc +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +ctinyint tinyint -64 62 3115 94 from deserializer +PREHOOK: query: describe formatted alltypesorc.cfloat +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: describe formatted alltypesorc.cfloat +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alltypesorc +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cfloat float -64.0 79.5530014038086 3115 117 from deserializer +PREHOOK: query: describe formatted alltypesorc.ctimestamp1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: describe formatted alltypesorc.ctimestamp1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alltypesorc +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +ctimestamp1 timestamp -30 31 3115 31 from deserializer +PREHOOK: query: describe formatted alltypesorc.cboolean2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: describe formatted alltypesorc.cboolean2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alltypesorc +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cboolean2 boolean 3115 3983 5190 from deserializer