diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java index 0ffdbe0..ff14842 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java @@ -26,15 +26,18 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.FileMetadataHandler; import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.PartFilterExprUtil; import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; import org.apache.hadoop.hive.metastore.api.Database; @@ -69,6 +72,8 @@ import org.apache.hadoop.hive.metastore.api.UnknownTableException; import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.PlanResult; import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.ScanPlan; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics; import org.apache.hadoop.hive.metastore.parser.ExpressionTree; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -1676,6 +1681,20 @@ public boolean updateTableColumnStatistics(ColumnStatistics colStats) throws try { getHBase().updateStatistics(colStats.getStatsDesc().getDbName(), colStats.getStatsDesc().getTableName(), null, colStats); + + //update table properties + List statsObjs = colStats.getStatsObj(); + List colNames = new ArrayList<>(); + for (ColumnStatisticsObj statsObj:statsObjs) { + colNames.add(statsObj.getColName()); + } + String dbName = colStats.getStatsDesc().getDbName(); + String tableName = colStats.getStatsDesc().getTableName(); + Table newTable = getTable(dbName, tableName); + Table newTableCopy = newTable.deepCopy(); + StatsSetupConst.setColumnStatsState(newTableCopy.getParameters(), colNames); + getHBase().replaceTable(newTable, newTableCopy); + commit = true; return true; } catch (IOException e) { @@ -1687,17 +1706,32 @@ public boolean updateTableColumnStatistics(ColumnStatistics colStats) throws } @Override - public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj, + public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { boolean commit = false; openTransaction(); try { - getHBase().updateStatistics(statsObj.getStatsDesc().getDbName(), - statsObj.getStatsDesc().getTableName(), partVals, statsObj); + getHBase().updateStatistics(colStats.getStatsDesc().getDbName(), + colStats.getStatsDesc().getTableName(), partVals, colStats); // We need to invalidate aggregates that include this partition - getHBase().getStatsCache().invalidate(statsObj.getStatsDesc().getDbName(), - statsObj.getStatsDesc().getTableName(), statsObj.getStatsDesc().getPartName()); + getHBase().getStatsCache().invalidate(colStats.getStatsDesc().getDbName(), + colStats.getStatsDesc().getTableName(), colStats.getStatsDesc().getPartName()); + + // update partition properties + String db_name = colStats.getStatsDesc().getDbName(); + String tbl_name = colStats.getStatsDesc().getTableName(); + Partition oldPart = getHBase().getPartition(db_name, tbl_name, partVals); + Partition new_partCopy = oldPart.deepCopy(); + List colNames = new ArrayList<>(); + List statsObjs = colStats.getStatsObj(); + for (ColumnStatisticsObj statsObj : statsObjs) { + colNames.add(statsObj.getColName()); + } + StatsSetupConst.setColumnStatsState(new_partCopy.getParameters(), colNames); + getHBase().replacePartition(oldPart, new_partCopy, + HBaseUtils.getPartitionKeyTypes(getTable(db_name, tbl_name).getPartitionKeys())); + commit = true; return true; } catch (IOException e) { diff --git a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out index d4f8e34..e3956a1 100644 --- a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out +++ b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out @@ -46,11 +46,11 @@ Stage-0 Statistics:Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_1] outputColumnNames:["_col0"] - Statistics:Num rows: 9173 Data size: 100540 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 9173 Data size: 100600 Basic stats: COMPLETE Column stats: NONE TableScan [TS_0] ACID table:true alias:acidtest1 - Statistics:Num rows: 9173 Data size: 100540 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 9173 Data size: 100600 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: select cast (c1 as string) from acidtest1 limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/bucket2.q.out b/ql/src/test/results/clientpositive/tez/bucket2.q.out index ee2c6c3..1b74b2e 100644 --- a/ql/src/test/results/clientpositive/tez/bucket2.q.out +++ b/ql/src/test/results/clientpositive/tez/bucket2.q.out @@ -74,7 +74,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -94,7 +94,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/bucket3.q.out b/ql/src/test/results/clientpositive/tez/bucket3.q.out index b8da0df..7645faa 100644 --- a/ql/src/test/results/clientpositive/tez/bucket3.q.out +++ b/ql/src/test/results/clientpositive/tez/bucket3.q.out @@ -78,7 +78,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -98,7 +98,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/bucket4.q.out b/ql/src/test/results/clientpositive/tez/bucket4.q.out index e35bc26..1e4f0a6 100644 --- a/ql/src/test/results/clientpositive/tez/bucket4.q.out +++ b/ql/src/test/results/clientpositive/tez/bucket4.q.out @@ -71,7 +71,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -91,7 +91,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/ctas.q.out b/ql/src/test/results/clientpositive/tez/ctas.q.out index de1b899..66d5a6c 100644 --- a/ql/src/test/results/clientpositive/tez/ctas.q.out +++ b/ql/src/test/results/clientpositive/tez/ctas.q.out @@ -769,7 +769,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -789,7 +789,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out index 4d44eb5..b61d0c2 100644 --- a/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out @@ -70,7 +70,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -90,7 +90,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index e2db163..0be17a7 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -32,23 +32,23 @@ Stage-0 Reducer 2 vectorized File Output Operator [FS_8] compressed:false - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} Select Operator [OP_7] | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] vectorized Reduce Output Operator [RS_6] key expressions:_col0 (type: int), _col1 (type: string) sort order:++ - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE Select Operator [OP_5] outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE TableScan [TS_0] ACID table:true alias:acid_vectorized - Statistics:Num rows: 10 Data size: 1704 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 10 Data size: 1714 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain select key, value FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol diff --git a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out index 540b069..3534a9c 100644 --- a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out @@ -140,7 +140,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -186,7 +186,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -232,7 +232,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -278,7 +278,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -350,7 +350,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -370,7 +370,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -420,7 +420,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -440,7 +440,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out index 4f0dfbb..14fbf0e 100644 --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out @@ -453,52 +453,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint) - outputColumnNames: _col2, _col3, _col4, _col5 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -510,7 +470,7 @@ POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl #### A masked pattern was here #### -9999 9999 1999.8000000003176 9999 9999 9999 9999 9999 +9999 9999 1999.8000000000002 9999 9999 9999 9999 9999 PREHOOK: query: explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl PREHOOK: type: QUERY @@ -518,52 +478,12 @@ POSTHOOK: query: explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: i, b, f, d - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), _col6 (type: double), _col7 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), min(VALUE._col2), max(VALUE._col3), min(VALUE._col4), max(VALUE._col5), min(VALUE._col6), max(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -583,56 +503,12 @@ POSTHOOK: query: explain select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: i, b, f, d - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), _col6 (type: double), _col7 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), min(VALUE._col2), max(VALUE._col3), min(VALUE._col4), max(VALUE._col5), min(VALUE._col6), max(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), '1' (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), 7.0 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -652,70 +528,24 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint) - outputColumnNames: _col2, _col3, _col4, _col5 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2010 -PREHOOK: Input: default@stats_tbl_part@dt=2011 -PREHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2010 -POSTHOOK: Input: default@stats_tbl_part@dt=2011 -POSTHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### -9489 9489 1897.8000000002944 9489 9489 9489 9489 9489 +9489 9489 1897.8000000000002 9489 9489 9489 9489 9489 PREHOOK: query: explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part PREHOOK: type: QUERY @@ -723,68 +553,22 @@ POSTHOOK: query: explain select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: i, b, f, d - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), _col6 (type: double), _col7 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), min(VALUE._col2), max(VALUE._col3), min(VALUE._col4), max(VALUE._col5), min(VALUE._col6), max(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2010 -PREHOOK: Input: default@stats_tbl_part@dt=2011 -PREHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2010 -POSTHOOK: Input: default@stats_tbl_part@dt=2011 -POSTHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### 65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0 PREHOOK: query: explain @@ -794,72 +578,22 @@ POSTHOOK: query: explain select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: i, b, f, d - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), _col6 (type: double), _col7 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), min(VALUE._col2), max(VALUE._col3), min(VALUE._col4), max(VALUE._col5), min(VALUE._col6), max(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), '1' (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: float), _col5 (type: float), 7.0 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2010 -PREHOOK: Input: default@stats_tbl_part@dt=2011 -PREHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2010 -POSTHOOK: Input: default@stats_tbl_part@dt=2011 -POSTHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### 65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0 PREHOOK: query: explain select count(ts) from stats_tbl_part diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out index 037ac78..6dea3e0 100644 --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries_with_filters.q.out @@ -149,64 +149,22 @@ POSTHOOK: query: explain select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 2322 Data size: 238167 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2322 Data size: 238167 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(1), sum(1), count(_col1), count(_col2), count(_col3), count(_col4), max(_col5), min(_col6), max(_col7), min(_col8) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint), _col9 (type: float), _col10 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), max(VALUE._col9), min(VALUE._col10) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### 2322 2322 2322 2322 2322 2322 2322 65791 4294967296 99.98 0.03 PREHOOK: query: explain @@ -216,64 +174,22 @@ POSTHOOK: query: explain select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 2219 Data size: 229011 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 2219 Data size: 229011 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(1), sum(1), sum(2), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7), max(_col8), min(_col9) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), max(VALUE._col10), min(VALUE._col11) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part -PREHOOK: Input: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### POSTHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part -POSTHOOK: Input: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### 2219 2219 2219 4438 2219 2219 2219 2219 65791 4294967296 99.96 0.04 PREHOOK: query: select count(*) from stats_tbl_part diff --git a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out index e20d5b8..a092ff3 100644 --- a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out @@ -281,7 +281,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -301,7 +301,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -354,7 +354,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -399,7 +399,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -444,7 +444,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -489,7 +489,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -702,7 +702,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -722,7 +722,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -775,7 +775,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -821,7 +821,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -867,7 +867,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -913,7 +913,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1139,7 +1139,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1159,7 +1159,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1212,7 +1212,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1257,7 +1257,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1302,7 +1302,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1347,7 +1347,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1587,7 +1587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1607,7 +1607,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1652,7 +1652,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1672,7 +1672,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1839,7 +1839,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1859,7 +1859,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1905,7 +1905,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -1925,7 +1925,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -2066,7 +2066,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -2086,7 +2086,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/sample1.q.out b/ql/src/test/results/clientpositive/tez/sample1.q.out index b9af526..9663218 100644 --- a/ql/src/test/results/clientpositive/tez/sample1.q.out +++ b/ql/src/test/results/clientpositive/tez/sample1.q.out @@ -120,7 +120,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/stats_only_null.q.out b/ql/src/test/results/clientpositive/tez/stats_only_null.q.out index c40e6a3..c11099b 100644 --- a/ql/src/test/results/clientpositive/tez/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/tez/stats_only_null.q.out @@ -230,7 +230,7 @@ Database: default Table: stats_null_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"} numFiles 1 numRows 6 rawDataSize 71 @@ -271,7 +271,7 @@ Database: default Table: stats_null_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"},\"BASIC_STATS\":\"true\"} numFiles 1 numRows 4 rawDataSize 49 @@ -295,52 +295,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 1016 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 1016 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -351,52 +311,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -412,14 +332,10 @@ POSTHOOK: Input: default@stats_null PREHOOK: query: select count(*), count(a), count(b), count(c), count(d) from stats_null_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part -PREHOOK: Input: default@stats_null_part@dt=2010 -PREHOOK: Input: default@stats_null_part@dt=2011 #### A masked pattern was here #### POSTHOOK: query: select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part -POSTHOOK: Input: default@stats_null_part@dt=2010 -POSTHOOK: Input: default@stats_null_part@dt=2011 #### A masked pattern was here #### 10 8 8 10 10 PREHOOK: query: drop table stats_null_part diff --git a/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out b/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out index f6e42e3..35700e6 100644 --- a/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out +++ b/ql/src/test/results/clientpositive/tez/transform_ppr1.q.out @@ -153,7 +153,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -199,7 +199,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -245,7 +245,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -291,7 +291,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out b/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out index a103408..a65fea7 100644 --- a/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out +++ b/ql/src/test/results/clientpositive/tez/transform_ppr2.q.out @@ -155,7 +155,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default' @@ -201,7 +201,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} bucket_count -1 columns key,value columns.comments 'default','default'