diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index de0282f6ac..11ccff4458 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -277,7 +277,7 @@ public static boolean canRunAutogatherStats(Operator curr) { case VARCHAR: case BINARY: case DECIMAL: - // TODO: Support case DATE: + case DATE: break; default: return false; diff --git ql/src/test/queries/clientpositive/autoColumnStats_11.q ql/src/test/queries/clientpositive/autoColumnStats_11.q new file mode 100644 index 0000000000..3b3adf7cd4 --- /dev/null +++ ql/src/test/queries/clientpositive/autoColumnStats_11.q @@ -0,0 +1,25 @@ +-- Checking autogathering column stats for DATE cols +set hive.stats.column.autogather=true; + +create table acs_t11(a int,b int,d date); + +-- should produce compute_stats aggregations +explain insert into acs_t11 values(1,1,'2011-11-11'); + +insert into acs_t11 values(1,1,'2011-11-11'); +describe formatted acs_t11 d; + +insert into acs_t11 values(1,1,'2006-11-11'); +describe formatted acs_t11 d; + +insert into acs_t11 values(1,1,'2001-11-11'); +describe formatted acs_t11 d; + +insert into acs_t11 values(1,1,'2004-11-11'); +describe formatted acs_t11 d; + +explain analyze table acs_t11 compute statistics for columns; +analyze table acs_t11 compute statistics for columns; + +-- should be the same as before analyze +describe formatted acs_t11 d; diff --git ql/src/test/results/clientpositive/autoColumnStats_11.q.out ql/src/test/results/clientpositive/autoColumnStats_11.q.out new file mode 100644 index 0000000000..7c9f0d563f --- /dev/null +++ ql/src/test/results/clientpositive/autoColumnStats_11.q.out @@ -0,0 +1,332 @@ +PREHOOK: query: create table acs_t11(a int,b int,d date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acs_t11 +POSTHOOK: query: create table acs_t11(a int,b int,d date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acs_t11 +PREHOOK: query: explain insert into acs_t11 values(1,1,'2011-11-11') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into acs_t11 values(1,1,'2011-11-11') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,1,'2011-11-11')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: int), CAST( col3 AS DATE) (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: date) + outputColumnNames: a, b, d + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, d + Column Types: int, int, date + Table: default.acs_t11 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.acs_t11 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into acs_t11 values(1,1,'2011-11-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acs_t11 +POSTHOOK: query: insert into acs_t11 values(1,1,'2011-11-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acs_t11 +POSTHOOK: Lineage: acs_t11.a SCRIPT [] +POSTHOOK: Lineage: acs_t11.b SCRIPT [] +POSTHOOK: Lineage: acs_t11.d SCRIPT [] +PREHOOK: query: describe formatted acs_t11 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acs_t11 +POSTHOOK: query: describe formatted acs_t11 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acs_t11 +col_name d +data_type date +min 2011-11-11 +max 2011-11-11 +num_nulls 0 +distinct_count 1 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"d\":\"true\"}} +PREHOOK: query: insert into acs_t11 values(1,1,'2006-11-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acs_t11 +POSTHOOK: query: insert into acs_t11 values(1,1,'2006-11-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acs_t11 +POSTHOOK: Lineage: acs_t11.a SCRIPT [] +POSTHOOK: Lineage: acs_t11.b SCRIPT [] +POSTHOOK: Lineage: acs_t11.d SCRIPT [] +PREHOOK: query: describe formatted acs_t11 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acs_t11 +POSTHOOK: query: describe formatted acs_t11 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acs_t11 +col_name d +data_type date +min 2006-11-11 +max 2011-11-11 +num_nulls 0 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"d\":\"true\"}} +PREHOOK: query: insert into acs_t11 values(1,1,'2001-11-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acs_t11 +POSTHOOK: query: insert into acs_t11 values(1,1,'2001-11-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acs_t11 +POSTHOOK: Lineage: acs_t11.a SCRIPT [] +POSTHOOK: Lineage: acs_t11.b SCRIPT [] +POSTHOOK: Lineage: acs_t11.d SCRIPT [] +PREHOOK: query: describe formatted acs_t11 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acs_t11 +POSTHOOK: query: describe formatted acs_t11 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acs_t11 +col_name d +data_type date +min 2001-11-11 +max 2011-11-11 +num_nulls 0 +distinct_count 3 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"d\":\"true\"}} +PREHOOK: query: insert into acs_t11 values(1,1,'2004-11-11') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acs_t11 +POSTHOOK: query: insert into acs_t11 values(1,1,'2004-11-11') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acs_t11 +POSTHOOK: Lineage: acs_t11.a SCRIPT [] +POSTHOOK: Lineage: acs_t11.b SCRIPT [] +POSTHOOK: Lineage: acs_t11.d SCRIPT [] +PREHOOK: query: describe formatted acs_t11 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acs_t11 +POSTHOOK: query: describe formatted acs_t11 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acs_t11 +col_name d +data_type date +min 2001-11-11 +max 2011-11-11 +num_nulls 0 +distinct_count 4 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"d\":\"true\"}} +PREHOOK: query: explain analyze table acs_t11 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +POSTHOOK: query: explain analyze table acs_t11 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: acs_t11 + Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), d (type: date) + outputColumnNames: a, b, d + Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1416 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, d + Column Types: int, int, date + Table: default.acs_t11 + +PREHOOK: query: analyze table acs_t11 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@acs_t11 +PREHOOK: Output: default@acs_t11 +#### A masked pattern was here #### +POSTHOOK: query: analyze table acs_t11 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@acs_t11 +POSTHOOK: Output: default@acs_t11 +#### A masked pattern was here #### +PREHOOK: query: describe formatted acs_t11 d +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acs_t11 +POSTHOOK: query: describe formatted acs_t11 d +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acs_t11 +col_name d +data_type date +min 2001-11-11 +max 2011-11-11 +num_nulls 0 +distinct_count 4 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"d\":\"true\"}}