diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 3e749eb..f78a4ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -1007,6 +1008,23 @@ public TableSpec(Hive db, HiveConf conf, String tableName, Map p } } + public TableSpec(Hive db, HiveConf conf, String tableName) throws HiveException { + this.tableName = tableName; + this.tableHandle = db.getTable(tableName); + if (this.tableHandle.isPartitioned()) { + this.specType = SpecType.STATIC_PARTITION; + this.partitions = new ArrayList<>(); + this.partitions.addAll(db.getAllPartitionsOf(this.tableHandle)); + List partCols = this.tableHandle.getPartCols(); + this.partSpec = new LinkedHashMap<>(); + for (FieldSchema partCol : partCols) { + partSpec.put(partCol.getName(), null); + } + } else { + this.specType = SpecType.TABLE_ONLY; + } + } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB @@ -1155,6 +1173,7 @@ public String toString() { private List colName; private List colType; private boolean tblLvl; + private boolean isCollectTableStats; public String getTableName() { @@ -1188,6 +1207,15 @@ public void setTblLvl(boolean isTblLvl) { public void setColType(List colType) { this.colType = colType; } + + public boolean isCollectTableStats() { + return isCollectTableStats; + } + + public void setCollectTableStats(boolean isCollectTableStats) { + this.isCollectTableStats = isCollectTableStats; + } + } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ff07b42..16c9b5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -68,6 +68,8 @@ private List colNames; private List colType; private Table tbl; + // We can collect table stats only in a whole table scan when table is a partitioned table + private boolean isCollectTableStats = true; public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticException { super(queryState); @@ -157,6 +159,7 @@ private StringBuilder genPartitionClause(Map partSpec) throws Sem for (String partKey : partSpec.keySet()) { String value; if ((value = partSpec.get(partKey)) != null) { + isCollectTableStats = false; if (!predPresent) { predPresent = true; } else { @@ -405,6 +408,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeRewrite.setTblLvl(isTableLevel); analyzeRewrite.setColName(colNames); analyzeRewrite.setColType(colType); + analyzeRewrite.setCollectTableStats(isCollectTableStats); qbp.setAnalyzeRewrite(analyzeRewrite); initCtx(ctx); ctx.setExplainConfig(origCtx.getExplainConfig()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f053093..b4f3392 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10248,7 +10248,8 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String RowResolver rwsch) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + // if it is collecting column stats, we still gather stats for table scan. + if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { @@ -10272,6 +10273,16 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String String tblName = tab.getTableName(); TableSpec tblSpec = qbp.getTableSpec(alias); + if (tblSpec == null && qbp.getAnalyzeRewrite().isCollectTableStats()) { + // this means we need to collect table stats while collecting column stats + try { + tblSpec = new TableSpec(db, conf, tblName); + tab.setTableSpec(tblSpec); + } catch (HiveException e) { + e.printStackTrace(); + throw new SemanticException(e); + } + } Map partSpec = tblSpec.getPartSpec(); if (partSpec != null) { @@ -10297,7 +10308,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String if (partSpec == null) { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } - List partitions = qbp.getTableSpec().partitions; + List partitions = tblSpec.partitions; if (partitions != null) { for (Partition partn : partitions) { // inputs.add(new ReadEntity(partn)); // is this needed at all? diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 5f9ccc8..4e334b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -66,7 +68,9 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; @@ -300,6 +304,19 @@ public void compile(final ParseContext pCtx, final List source = rootTasks.iterator().next(); + source.getMapWork().iterator().next().setGatheringStats(true); + TableScanOperator tableScanOperator = (TableScanOperator) source.getMapWork().iterator() + .next().getAllRootOperators().iterator().next(); + Task statsTask = genTableStats(tableScanOperator, source, pCtx); + // Make ColumnStatsTask depend on the stats task because stats task + // does not read HMS (it reads table spec instead) when writes stats. + // See more details in StatsTask. + statsTask.addDependentTask(source.getChildTasks().iterator().next()); + } } else { for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx .getColumnStatsAutoGatherContexts()) { @@ -364,6 +381,21 @@ public void compile(final ParseContext pCtx, final List genTableStats(TableScanOperator tableScan, Task sourceTask, + ParseContext parseContext) { + // note that, we have to scan table if we want to get column stats. Thus, we + // can ignore noscan, orc format, etc. + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(sourceTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + sourceTask.addDependentTask(statsTask); + return statsTask; + } + private void patchUpAfterCTASorMaterializedView(final List> rootTasks, final HashSet outputs, Task createTask) { diff --git a/ql/src/test/queries/clientpositive/column_table_stats.q b/ql/src/test/queries/clientpositive/column_table_stats.q new file mode 100644 index 0000000..5f3da9d --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats.q @@ -0,0 +1,40 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s; + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); diff --git a/ql/src/test/results/clientpositive/column_table_stats.q.out b/ql/src/test/results/clientpositive/column_table_stats.q.out new file mode 100644 index 0000000..f1ee4cc --- /dev/null +++ b/ql/src/test/results/clientpositive/column_table_stats.q.out @@ -0,0 +1,616 @@ +PREHOOK: query: DROP TABLE IF EXISTS s +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS s +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@s +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@s +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table s compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table s compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.s/ + GatherStats: true + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: s + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.s + name: default.s + Truncated Path -> Alias: + /s [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.s + Is Table Level Stats: true + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.s/ + +PREHOOK: query: analyze table s compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +#### A masked pattern was here #### +POSTHOOK: query: analyze table s compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +#### A masked pattern was here #### +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + +PREHOOK: query: analyze table spart compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1