diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 7febfd5..0c17246 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -318,6 +319,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } Table tbl = tsOp.getConf().getTableMetadata(); + if (AcidUtils.isAcidTable(tbl)) { + Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer."); + return null; + } List oneRow = new ArrayList(); Hive hive = Hive.get(pctx.getConf()); diff --git a/ql/src/test/queries/clientpositive/acid_table_stats.q b/ql/src/test/queries/clientpositive/acid_table_stats.q index 45da8d4..23d0df4 100644 --- a/ql/src/test/queries/clientpositive/acid_table_stats.q +++ b/ql/src/test/queries/clientpositive/acid_table_stats.q @@ -31,6 +31,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics for columns; desc formatted acid partition(ds='2008-04-08'); +set hive.compute.query.using.stats=false; +select count(*) from acid where ds='2008-04-08'; + +set hive.compute.query.using.stats=true; +explain select count(*) from acid where ds='2008-04-08'; +select count(*) from acid where ds='2008-04-08'; + insert into table acid partition(ds) select key,value,ds from srcpart; desc formatted acid partition(ds='2008-04-08'); @@ -39,6 +46,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics; desc formatted acid partition(ds='2008-04-08'); +set hive.compute.query.using.stats=true; +explain select count(*) from acid where ds='2008-04-08'; +select count(*) from acid where ds='2008-04-08'; + +analyze table acid partition(ds='2008-04-08') compute statistics for columns; +explain select max(key) from acid where ds='2008-04-08'; + drop table acid; CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC; diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out index f662a48..fcfecd0 100644 --- a/ql/src/test/results/clientpositive/acid_table_stats.q.out +++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -273,6 +273,74 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: select count(*) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain select count(*) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid + Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +1000 PREHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -381,6 +449,121 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: explain select count(*) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid + Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +2000 +PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: explain select max(key) from acid where ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(key) from acid where ds='2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid + Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: drop table acid PREHOOK: type: DROPTABLE PREHOOK: Input: default@acid