From 7c1b5d8599099904c8b7a09bd8b8215cb7f90527 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 2 Mar 2017 18:56:14 -0800 Subject: [PATCH] HIVE-16098 : Describe table doesn't show stats for partitioned tables --- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 43 +++++++++++++++++-- ql/src/test/queries/clientpositive/desc_stats.q | 2 + .../test/results/clientpositive/desc_stats.q.out | 50 ++++++++++++++++++++++ 3 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/desc_stats.q create mode 100644 ql/src/test/results/clientpositive/desc_stats.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index c11ba97..564cd2b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -36,6 +36,7 @@ import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -69,6 +70,7 @@ import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.CompactionResponse; @@ -142,6 +144,7 @@ import org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AbortTxnsDesc; @@ -216,6 +219,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveRoleGrant; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveV1Authorizer; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.Deserializer; @@ -3253,7 +3257,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { DataOutputStream outStream = getOutputStream(descTbl.getResFile()); try { - LOG.info("DDLTask: got data for " + tbl.getTableName()); + LOG.debug("DDLTask: got data for " + tbl.getTableName()); List cols = null; List colStats = null; @@ -3273,6 +3277,26 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { if (!descTbl.isFormatted()) { cols.addAll(tbl.getPartCols()); } + + if (tbl.isPartitioned() && part == null) { + // No partitioned specified for partitioned table, lets fetch all. + Map tblProps = tbl.getParameters() == null ? new HashMap() : tbl.getParameters(); + Collection parts = db.getAllPartitionsOf(tbl); + for (String stat : StatsSetupConst.supportedStats) { + boolean state = true; + long statVal = 0l; + for (Partition partition : parts) { + Map props = partition.getParameters(); + state &= StatsSetupConst.areBasicStatsUptoDate(props); + if (props != null) { + statVal += Long.parseLong(props.get(stat)); + } + } + StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(state)); + tblProps.put(stat, String.valueOf(statVal)); + } + tbl.setParameters(tblProps); + } } else { cols = Hive.getFieldsFromDeserializer(colPath, deserializer); if (descTbl.isFormatted()) { @@ -3283,7 +3307,20 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { List colNames = new ArrayList(); colNames.add(colName.toLowerCase()); if (null == part) { - colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames); + if (tbl.isPartitioned()) { + List parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1); + AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts); + colStats = aggrStats.getColStats(); + Map tblProps = tbl.getParameters() == null ? new HashMap() : tbl.getParameters(); + if (parts.size() == aggrStats.getPartsFound()) { + StatsSetupConst.setColumnStatsState(tblProps, colNames); + } else { + StatsSetupConst.removeColumnStatsState(tblProps, colNames); + } + tbl.setParameters(tblProps); + } else { + colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames); + } } else { List partitions = new ArrayList(); partitions.add(part.getName()); @@ -3305,7 +3342,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { cols, descTbl.isFormatted(), descTbl.isExt(), descTbl.isPretty(), isOutputPadded, colStats, pkInfo, fkInfo); - LOG.info("DDLTask: written data for " + tbl.getTableName()); + LOG.debug("DDLTask: written data for " + tbl.getTableName()); } catch (SQLException e) { throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName); diff --git a/ql/src/test/queries/clientpositive/desc_stats.q b/ql/src/test/queries/clientpositive/desc_stats.q new file mode 100644 index 0000000..0f4766e --- /dev/null +++ b/ql/src/test/queries/clientpositive/desc_stats.q @@ -0,0 +1,2 @@ +describe formatted srcpart; +describe formatted srcpart key; diff --git a/ql/src/test/results/clientpositive/desc_stats.q.out b/ql/src/test/results/clientpositive/desc_stats.q.out new file mode 100644 index 0000000..731e56a --- /dev/null +++ b/ql/src/test/results/clientpositive/desc_stats.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: describe formatted srcpart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: describe formatted srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 2000 + rawDataSize 21248 + totalSize 23248 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted srcpart key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: describe formatted srcpart key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 205 2.812 3 from deserializer -- 2.10.1 (Apple Git-78)