diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 64f9c70f05..6295d7f26f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.apache.hadoop.hive.ql.lib.Node; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask; import org.apache.hadoop.mapred.InputFormat; /** @@ -84,8 +86,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, if (parseCtx.getQueryProperties().isAnalyzeCommand()) { boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand(); - if (OrcInputFormat.class.isAssignableFrom(inputFormat) || - MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) { + if (BasicStatsNoJobTask.canUseFooterScan(table, inputFormat)) { // For ORC and Parquet, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index 03cceace40..5426523207 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -22,6 +22,7 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,8 +87,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, assert alias != null; TezWork tezWork = context.currentTask.getWork(); - if (OrcInputFormat.class.isAssignableFrom(inputFormat) || - MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) { + if ((OrcInputFormat.class.isAssignableFrom(inputFormat) && !AcidUtils.isFullAcidTable(table)) + || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) { // For ORC & Parquet, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 49709e596e..7a2a2c7a28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -20,6 +20,7 @@ import com.google.common.collect.Interner; import com.google.common.collect.Interners; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -62,6 +63,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.DefaultFetchFormatter; import org.apache.hadoop.hive.serde2.NoOpFetchFormatter; @@ -381,7 +383,9 @@ private String extractTableFullName(StatsTask tsk) throws SemanticException { TableSpec tableSpec = new TableSpec(table, partitions); tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); - if (inputFormat.equals(OrcInputFormat.class)) { + // Note: this should probably use BasicStatsNoJobTask.canUseFooterScan, but it doesn't check + // Parquet for some reason. I'm keeping the existing behavior for now. + if (inputFormat.equals(OrcInputFormat.class) && !AcidUtils.isTransactionalTable(table)) { // For ORC, there is no Tez Job for table stats. StatsWork columnStatsWork = new StatsWork(table, parseContext.getConf()); columnStatsWork.setFooterScan(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java index 28d4de7f7b..a61f88a22f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java @@ -22,6 +22,7 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,8 +97,8 @@ public Object process(Node nd, Stack stack, Preconditions.checkArgument(alias != null, "AssertionError: expected alias to be not null"); SparkWork sparkWork = context.currentTask.getWork(); - if (OrcInputFormat.class.isAssignableFrom(inputFormat) || - MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) { + if ((OrcInputFormat.class.isAssignableFrom(inputFormat) && !AcidUtils.isFullAcidTable(table)) + || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) { // For ORC & Parquet, all the following statements are the same // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index 3128ee8200..fd060983a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -71,6 +73,8 @@ * faster to compute the table/partition statistics by reading the footer than scanning all the * rows. This task can be used for computing basic stats like numFiles, numRows, fileSize, * rawDataSize from ORC footer. + * However, this cannot be used for full ACID tables, since some of the files may contain updates + * and deletes to existing rows, so summing up the per-file row counts is invalid. **/ public class BasicStatsNoJobTask implements IStatsProcessor { @@ -86,6 +90,11 @@ public BasicStatsNoJobTask(HiveConf conf, BasicStatsNoJobWork work) { console = new LogHelper(LOG); } + public static boolean canUseFooterScan( + Table table, Class inputFormat) { + return (OrcInputFormat.class.isAssignableFrom(inputFormat) && !AcidUtils.isFullAcidTable(table)) + || MapredParquetInputFormat.class.isAssignableFrom(inputFormat); + } @Override public void initialize(CompilationOpContext opContext) { diff --git ql/src/test/queries/clientpositive/acid_no_buckets.q ql/src/test/queries/clientpositive/acid_no_buckets.q index bcf9e0634b..552010a63b 100644 --- ql/src/test/queries/clientpositive/acid_no_buckets.q +++ ql/src/test/queries/clientpositive/acid_no_buckets.q @@ -28,8 +28,16 @@ select ds, hr, key, value from srcpart_acid where value like '%updated' order by insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); +describe formatted srcpart_acid; +describe formatted srcpart_acid key; + analyze table srcpart_acid PARTITION(ds, hr) compute statistics; analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns; + +-- make sure the stats stay the same after analyze (insert and update above also update stats) +describe formatted srcpart_acid; +describe formatted srcpart_acid key; + explain delete from srcpart_acid where key in( '1001', '213', '43'); --delete some rows from initial load, some that were updated and some that were inserted delete from srcpart_acid where key in( '1001', '213', '43'); diff --git ql/src/test/results/clientpositive/acid_table_stats.q.out ql/src/test/results/clientpositive/acid_table_stats.q.out index 841a5a42ae..2fe4f97630 100644 --- ql/src/test/results/clientpositive/acid_table_stats.q.out +++ ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -182,11 +182,13 @@ POSTHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid PREHOOK: Output: default@acid@ds=2008-04-08 POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 POSTHOOK: Output: default@acid POSTHOOK: Output: default@acid@ds=2008-04-08 PREHOOK: query: desc formatted acid partition(ds='2008-04-08') @@ -209,10 +211,9 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 2 numRows 1000 - rawDataSize 208000 + rawDataSize 0 totalSize 4063 #### A masked pattern was here #### @@ -260,10 +261,10 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 - rawDataSize 208000 + rawDataSize 0 totalSize 4063 #### A masked pattern was here #### @@ -303,9 +304,9 @@ STAGE PLANS: TableScan alias: acid filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -390,7 +391,7 @@ Table: acid Partition Parameters: numFiles 4 numRows 3000 - rawDataSize 208000 + rawDataSize 0 totalSize 8118 #### A masked pattern was here #### @@ -407,11 +408,13 @@ Storage Desc Params: PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid PREHOOK: Output: default@acid@ds=2008-04-08 POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 POSTHOOK: Output: default@acid POSTHOOK: Output: default@acid@ds=2008-04-08 PREHOOK: query: desc formatted acid partition(ds='2008-04-08') @@ -434,10 +437,9 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 4 numRows 2000 - rawDataSize 416000 + rawDataSize 0 totalSize 8118 #### A masked pattern was here #### @@ -466,9 +468,9 @@ STAGE PLANS: TableScan alias: acid filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 81180 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 81180 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -539,11 +541,11 @@ STAGE PLANS: TableScan alias: acid filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 81180 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 81180 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(key) mode: hash diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 36a6a5d5d1..9762de617c 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -58,6 +58,10 @@ POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acid PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 @@ -66,6 +70,10 @@ PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acid POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 @@ -239,9 +247,74 @@ POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 2008-04-08 11 1001 val1001 2008-04-08 11 1002 val1002 2008-04-08 11 1003 val1003 +PREHOOK: query: describe formatted srcpart_acid +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart_acid +POSTHOOK: query: describe formatted srcpart_acid +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart_acid +# col_name data_type comment +key string +value string + +# Partition Information +# col_name data_type comment +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + bucketing_version 2 + numFiles 9 + numPartitions 4 + numRows 2003 + rawDataSize 0 + totalSize 17988 + transactional true + transactional_properties default +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted srcpart_acid key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart_acid +POSTHOOK: query: describe formatted srcpart_acid key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart_acid +col_name key +data_type string +min +max +num_nulls 0 +distinct_count 316 +avg_col_len 2.812 +max_col_len 3 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acid PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 @@ -250,6 +323,10 @@ PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acid POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 @@ -281,6 +358,67 @@ POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 #### A masked pattern was here #### +PREHOOK: query: describe formatted srcpart_acid +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart_acid +POSTHOOK: query: describe formatted srcpart_acid +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart_acid +# col_name data_type comment +key string +value string + +# Partition Information +# col_name data_type comment +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + bucketing_version 2 + numFiles 9 + numPartitions 4 + numRows 2003 + rawDataSize 0 + totalSize 17988 + transactional true + transactional_properties default +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted srcpart_acid key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart_acid +POSTHOOK: query: describe formatted srcpart_acid key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart_acid +col_name key +data_type string +min +max +num_nulls 0 +distinct_count 316 +avg_col_len 2.812 +max_col_len 3 +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {} PREHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') PREHOOK: type: QUERY POSTHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') @@ -304,19 +442,19 @@ STAGE PLANS: TableScan alias: srcpart_acid filterExpr: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2003 Data size: 911365 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 101 Data size: 45955 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 45500 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -326,10 +464,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -561,6 +699,10 @@ POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidb PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 @@ -569,6 +711,10 @@ PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidb POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 @@ -745,6 +891,10 @@ POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidb PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 @@ -753,6 +903,10 @@ PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidb POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 @@ -807,19 +961,19 @@ STAGE PLANS: TableScan alias: srcpart_acidb filterExpr: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2003 Data size: 911365 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key) IN ('1001', '213', '43') (type: boolean) - Statistics: Num rows: 101 Data size: 45955 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 45500 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -829,10 +983,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 101 Data size: 44844 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 100 Data size: 44400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1064,6 +1218,10 @@ POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidv PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 @@ -1072,6 +1230,10 @@ PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidv POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 @@ -1264,6 +1426,10 @@ POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidv PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 @@ -1272,6 +1438,10 @@ PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidv POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 @@ -1850,6 +2020,10 @@ POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidvb PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 @@ -1858,6 +2032,10 @@ PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidvb POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 @@ -2050,6 +2228,10 @@ POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_acidvb PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 @@ -2058,6 +2240,10 @@ PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_acidvb POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index eda3985d0a..bfa204ead3 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -265,11 +265,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MATERIALIZED_VIEW Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 numFiles 2 numRows 2 - rawDataSize 248 + rawDataSize 0 totalSize 736 transactional true transactional_properties default @@ -494,11 +494,11 @@ Retention: 0 #### A masked pattern was here #### Table Type: MATERIALIZED_VIEW Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 numFiles 2 numRows 2 - rawDataSize 248 + rawDataSize 0 totalSize 736 transactional true transactional_properties default @@ -948,7 +948,7 @@ Table Parameters: bucketing_version 2 numFiles 3 numRows 3 - rawDataSize 248 + rawDataSize 0 totalSize 1508 transactional true transactional_properties default diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out index 99832ff847..44eca6d499 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out @@ -405,7 +405,7 @@ Table Parameters: bucketing_version 2 numFiles 2 numRows 5 - rawDataSize 348 + rawDataSize 0 totalSize 1071 transactional true transactional_properties default