diff --git ql/src/test/results/clientnegative/stats_partialscan_autogether.q.out ql/src/test/results/clientnegative/stats_partialscan_autogether.q.out index 70a98b4..2bd309a 100644 --- ql/src/test/results/clientnegative/stats_partialscan_autogether.q.out +++ ql/src/test/results/clientnegative/stats_partialscan_autogether.q.out @@ -72,10 +72,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE false - numFiles 1 + numFiles 22 numRows -1 rawDataSize -1 - totalSize 5293 + totalSize 6954 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/combine2.q.out ql/src/test/results/clientpositive/combine2.q.out index 00a5c93..362e900 100644 --- ql/src/test/results/clientpositive/combine2.q.out +++ ql/src/test/results/clientpositive/combine2.q.out @@ -148,7 +148,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select key, value from combine2 where value is not null order by key PREHOOK: type: QUERY PREHOOK: Input: default@combine2 @@ -221,16 +220,24 @@ STAGE PLANS: combine2 TableScan alias: combine2 + Statistics: + numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -246,6 +253,7 @@ STAGE PLANS: partition values: value 2010-04-21 09:45:00 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -270,15 +278,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -291,6 +294,7 @@ STAGE PLANS: partition values: value val_0 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -315,15 +319,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -336,6 +335,7 @@ STAGE PLANS: partition values: value val_2 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -360,15 +360,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -381,6 +376,7 @@ STAGE PLANS: partition values: value val_4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -405,15 +401,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -426,6 +417,7 @@ STAGE PLANS: partition values: value val_5 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -450,15 +442,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -471,6 +458,7 @@ STAGE PLANS: partition values: value val_8 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -495,15 +483,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -516,6 +499,7 @@ STAGE PLANS: partition values: value val_9 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -540,15 +524,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -561,6 +540,7 @@ STAGE PLANS: partition values: value | properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key columns.types string @@ -585,15 +565,10 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 12 - numPartitions 8 - numRows 12 partition_columns value - rawDataSize 14 serialization.ddl struct combine2 { string key} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 26 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.combine2 @@ -615,16 +590,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -645,7 +626,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select count(1) from combine2 where value is not null PREHOOK: type: QUERY PREHOOK: Input: default@combine2 @@ -762,7 +742,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index f046662..0566894 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -117,7 +117,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -160,8 +159,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 - numPartitions 0 numRows 10 rawDataSize 96 totalSize 106 @@ -279,7 +278,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -322,8 +320,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 - numPartitions 0 numRows 10 rawDataSize 96 totalSize 106 @@ -442,7 +440,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -485,8 +482,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 - numPartitions 0 numRows 10 rawDataSize 120 totalSize 199 @@ -512,7 +509,6 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: STAGE PLANS: -STAGE PLANS: PREHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 PREHOOK: type: CREATETABLE POSTHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 @@ -552,8 +548,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 - numPartitions 0 numRows 10 rawDataSize 120 totalSize 199 @@ -672,7 +668,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -715,8 +710,8 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 - numPartitions 0 numRows 10 rawDataSize 96 totalSize 106 @@ -754,6 +749,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -762,6 +759,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -769,6 +768,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -784,13 +785,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -803,13 +804,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -825,7 +826,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE Limit + Statistics: + numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -857,6 +862,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -891,12 +898,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE Limit + Statistics: + numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 10 dataSize: 2000 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -940,7 +953,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src @@ -953,11 +965,11 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@nzhang_ctas6 -PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@nzhang_ctas6 -POSTHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +POSTHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src tablesample (10 rows) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@nzhang_ctas6 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index df3b764..0f5738a 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -59,6 +59,7 @@ Table: list_bucketing_table Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 2 numRows 309 rawDataSize 1482 @@ -137,10 +138,11 @@ Table: list_bucketing_table2 Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 - totalSize 1791 + totalSize 136 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/list_bucket_dml_1.q.out ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index 6b5648a..ddfc09f 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -42,6 +42,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -52,12 +54,16 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -90,6 +96,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -114,15 +121,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -136,6 +138,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -160,15 +163,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -207,7 +205,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -254,6 +251,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 2 numRows 500 rawDataSize 5312 @@ -302,6 +300,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 2 numRows 500 rawDataSize 5312 @@ -396,12 +395,16 @@ STAGE PLANS: list_bucketing_dynamic_part TableScan alias: list_bucketing_dynamic_part + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = '484') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -409,11 +412,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -441,6 +448,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -465,15 +473,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 4 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 10624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part @@ -485,7 +488,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 41a9215..37f5cc1 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -50,6 +50,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -58,12 +60,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -93,13 +99,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -112,13 +118,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -162,7 +168,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -209,6 +214,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 500 rawDataSize 4812 @@ -252,12 +258,16 @@ STAGE PLANS: list_bucketing_static_part TableScan alias: list_bucketing_static_part + Statistics: + numRows: 500 dataSize: 4812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 'val_466') type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -265,11 +275,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -297,6 +311,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -321,15 +336,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 4 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 4812 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 5522 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -341,7 +351,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 562a684..d11e176 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -46,6 +46,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -60,12 +62,16 @@ STAGE PLANS: expr: UDFToString(1) type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -95,13 +101,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -114,13 +120,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -164,7 +170,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -223,6 +228,7 @@ Table: list_bucketing_mul_col Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 500 rawDataSize 6312 @@ -271,12 +277,16 @@ STAGE PLANS: list_bucketing_mul_col TableScan alias: list_bucketing_mul_col + Statistics: + numRows: 500 dataSize: 6312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((col2 = '466') and (col4 = 'val_466')) type: boolean + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: col1 @@ -294,6 +304,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -305,6 +317,8 @@ STAGE PLANS: expr: _col6 type: string sort order: ++++ + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -333,6 +347,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns col1,col2,col3,col4,col5 columns.types string:string:string:string:string @@ -357,15 +372,10 @@ STAGE PLANS: columns.types string:string:string:string:string #### A masked pattern was here #### name default.list_bucketing_mul_col - numFiles 4 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 6312 serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col @@ -375,11 +385,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -400,7 +414,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr PREHOOK: type: QUERY @@ -448,12 +461,16 @@ STAGE PLANS: list_bucketing_mul_col TableScan alias: list_bucketing_mul_col + Statistics: + numRows: 500 dataSize: 6312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((col2 = '382') and (col4 = 'val_382')) type: boolean + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: col1 @@ -471,6 +488,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -482,6 +501,8 @@ STAGE PLANS: expr: _col6 type: string sort order: ++++ + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -510,6 +531,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns col1,col2,col3,col4,col5 columns.types string:string:string:string:string @@ -534,15 +556,10 @@ STAGE PLANS: columns.types string:string:string:string:string #### A masked pattern was here #### name default.list_bucketing_mul_col - numFiles 4 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 6312 serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col @@ -552,11 +569,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -577,7 +598,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" ORDER BY col2, col4, ds, hr PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index c93c87d..10694b1 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -46,6 +46,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -60,12 +62,16 @@ STAGE PLANS: expr: UDFToString(1) type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ + Statistics: + numRows: 29 dataSize: 5812 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -95,13 +101,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -114,13 +120,13 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.src numFiles 1 - numPartitions 0 numRows 0 rawDataSize 0 serialization.ddl struct src { string key, string value} @@ -164,7 +170,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -223,6 +228,7 @@ Table: list_bucketing_mul_col Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 500 rawDataSize 6312 @@ -271,12 +277,16 @@ STAGE PLANS: list_bucketing_mul_col TableScan alias: list_bucketing_mul_col + Statistics: + numRows: 500 dataSize: 6312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((col2 = '466') and (col4 = 'val_466')) type: boolean + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: col1 @@ -294,6 +304,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -305,6 +317,8 @@ STAGE PLANS: expr: _col6 type: string sort order: ++++ + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -333,6 +347,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns col1,col2,col3,col4,col5 columns.types string:string:string:string:string @@ -357,15 +372,10 @@ STAGE PLANS: columns.types string:string:string:string:string #### A masked pattern was here #### name default.list_bucketing_mul_col - numFiles 4 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 6312 serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col @@ -375,11 +385,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1578 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -400,7 +414,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" ORDER BY col2, col4, ds, hr PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index a97f9bc..41aad2b 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -64,6 +64,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -72,12 +74,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -110,6 +116,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -134,15 +141,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -156,6 +158,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -180,15 +183,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -227,7 +225,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -278,6 +275,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 6 numRows 1000 rawDataSize 9624 @@ -349,12 +347,16 @@ STAGE PLANS: list_bucketing_static_part TableScan alias: list_bucketing_static_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -366,6 +368,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -377,6 +381,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -399,6 +405,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -423,15 +430,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -441,11 +443,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -466,7 +472,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_3.q.out ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index ac29e02..de5ed8e 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -36,6 +36,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -44,12 +46,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -82,6 +88,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -106,15 +113,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -128,6 +130,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -152,15 +155,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -199,7 +197,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -241,6 +238,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 1000 rawDataSize 10624 @@ -325,12 +323,16 @@ STAGE PLANS: list_bucketing_static_part TableScan alias: list_bucketing_static_part + Statistics: + numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = '484') type: boolean + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -338,11 +340,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -370,6 +376,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -394,15 +401,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 4 - numPartitions 1 - numRows 1000 partition_columns ds/hr - rawDataSize 10624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_static_part @@ -414,7 +416,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 44953ef..87804e0 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -72,6 +72,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -80,12 +82,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -118,6 +124,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -142,15 +149,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -164,6 +166,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -188,15 +191,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -235,7 +233,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -286,6 +283,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 6 numRows 1000 rawDataSize 9624 @@ -339,6 +337,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -347,12 +347,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -363,14 +367,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -389,6 +388,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -413,15 +413,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -435,6 +430,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -459,15 +455,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -502,14 +493,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -534,14 +520,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -553,14 +534,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -583,14 +559,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -602,14 +573,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -623,7 +589,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -680,6 +645,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 1000 rawDataSize 9624 @@ -757,12 +723,16 @@ STAGE PLANS: list_bucketing_static_part TableScan alias: list_bucketing_static_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -774,6 +744,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -785,6 +757,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -807,6 +781,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -831,15 +806,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 4 - numPartitions 1 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -849,11 +819,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -874,7 +848,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 5a4adcd..0294842 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -42,6 +42,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -52,12 +54,16 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -90,6 +96,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -114,15 +121,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -136,6 +138,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -160,15 +163,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -207,7 +205,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -254,6 +251,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 500 rawDataSize 5312 @@ -302,6 +300,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 500 rawDataSize 5312 @@ -401,12 +400,16 @@ STAGE PLANS: list_bucketing_dynamic_part TableScan alias: list_bucketing_dynamic_part + Statistics: + numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '103') and (value = 'val_103')) type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -418,6 +421,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -429,6 +434,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -451,6 +458,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -475,15 +483,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 10624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part @@ -497,6 +500,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -521,15 +525,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 10624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part @@ -540,11 +539,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -565,7 +568,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" ORDER BY key, value, ds, hr PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index b5da198..af2da28 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -118,6 +118,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -128,12 +130,16 @@ STAGE PLANS: expr: if(((key % 100) = 0), 'a1', 'b1') type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -166,6 +172,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -190,15 +197,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -212,6 +214,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -236,15 +239,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -283,7 +281,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -342,10 +339,11 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: - numFiles 2 + COLUMN_STATS_ACCURATE true + numFiles 1 numRows 16 rawDataSize 136 - totalSize 310 + totalSize 102 #### A masked pattern was here #### # Storage Information @@ -388,6 +386,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 6 numRows 984 rawDataSize 9488 @@ -443,6 +442,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -453,12 +454,16 @@ STAGE PLANS: expr: if(((key % 100) = 0), 'a1', 'b1') type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -469,15 +474,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -496,6 +496,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -520,15 +521,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -542,6 +538,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -566,15 +563,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -609,15 +601,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -642,15 +629,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -662,15 +644,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -693,15 +670,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -713,15 +685,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 8 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 11044 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -735,7 +702,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -806,10 +772,11 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 numRows 16 rawDataSize 136 - totalSize 254 + totalSize 102 #### A masked pattern was here #### # Storage Information @@ -856,6 +823,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 984 rawDataSize 9488 @@ -947,12 +915,16 @@ STAGE PLANS: list_bucketing_dynamic_part TableScan alias: list_bucketing_dynamic_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -964,11 +936,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -996,6 +972,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -1008,7 +985,7 @@ STAGE PLANS: serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 254 + totalSize 102 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -1020,15 +997,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 5 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10876 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -1042,6 +1014,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -1066,15 +1039,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 5 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10876 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -1087,7 +1055,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 0f75490..0696463 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -66,6 +66,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -76,12 +78,16 @@ STAGE PLANS: expr: if(((key % 100) = 0), 'a1', 'b1') type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -114,6 +120,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -138,15 +145,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -160,6 +162,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -184,15 +187,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -231,7 +229,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -290,10 +287,11 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: - numFiles 2 + COLUMN_STATS_ACCURATE true + numFiles 1 numRows 16 rawDataSize 136 - totalSize 310 + totalSize 204 #### A masked pattern was here #### # Storage Information @@ -336,6 +334,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 984 rawDataSize 9488 @@ -391,6 +390,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -401,12 +402,16 @@ STAGE PLANS: expr: if(((key % 100) = 0), 'a1', 'b1') type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -417,15 +422,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -444,6 +444,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -468,15 +469,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -490,6 +486,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -514,15 +511,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -557,15 +549,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -590,15 +577,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -610,15 +592,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -641,15 +618,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -661,15 +633,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 6 - numPartitions 2 - numRows 1000 partition_columns hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10886 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -683,7 +650,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -754,10 +720,11 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 numRows 16 rawDataSize 136 - totalSize 254 + totalSize 136 #### A masked pattern was here #### # Storage Information @@ -804,6 +771,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 984 rawDataSize 9488 @@ -895,12 +863,16 @@ STAGE PLANS: list_bucketing_dynamic_part TableScan alias: list_bucketing_dynamic_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -912,11 +884,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -944,6 +920,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -956,7 +933,7 @@ STAGE PLANS: serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 254 + totalSize 136 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -968,15 +945,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 4 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10774 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -990,6 +962,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -1014,15 +987,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 4 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10774 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -1035,7 +1003,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index 1daa114..7080f23 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -122,6 +122,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -132,12 +134,16 @@ STAGE PLANS: expr: if(((key % 100) = 0), 'a1', 'b1') type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -170,6 +176,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -194,15 +201,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -216,6 +218,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -240,15 +243,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -287,7 +285,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -346,10 +343,11 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: - numFiles 2 + COLUMN_STATS_ACCURATE true + numFiles 1 numRows 16 rawDataSize 136 - totalSize 310 + totalSize 102 #### A masked pattern was here #### # Storage Information @@ -392,6 +390,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 6 numRows 984 rawDataSize 9488 @@ -454,6 +453,7 @@ Table: list_bucketing_dynamic_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 984 rawDataSize 9488 @@ -533,12 +533,16 @@ STAGE PLANS: list_bucketing_dynamic_part TableScan alias: list_bucketing_dynamic_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -550,11 +554,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -582,19 +590,20 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 2 + numFiles 1 numRows 16 partition_columns ds/hr rawDataSize 136 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 310 + totalSize 102 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -606,15 +615,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 5 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10896 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -628,6 +632,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -652,15 +657,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_dynamic_part - numFiles 5 - numPartitions 2 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_dynamic_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10896 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part @@ -673,7 +673,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 37174a2..b6118ea 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -72,6 +72,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -80,12 +82,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -118,6 +124,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -142,15 +149,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -164,6 +166,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -188,15 +191,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -235,7 +233,6 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -286,6 +283,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 6 numRows 1000 rawDataSize 9624 @@ -339,6 +337,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -347,12 +347,16 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: + numRows: 58 dataSize: 11624 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -363,14 +367,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -389,6 +388,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -413,15 +413,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -435,6 +430,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -459,15 +455,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 partition_columns ds/hr - rawDataSize 0 serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart @@ -502,14 +493,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -534,14 +520,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -553,14 +534,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -583,14 +559,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe @@ -602,14 +573,9 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 6 - numPartitions 1 - numRows 1000 - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -623,7 +589,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -680,6 +645,7 @@ Table: list_bucketing_static_part Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 4 numRows 1000 rawDataSize 9624 @@ -757,12 +723,16 @@ STAGE PLANS: list_bucketing_static_part TableScan alias: list_bucketing_static_part + Statistics: + numRows: 1000 dataSize: 9624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -774,6 +744,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -785,6 +757,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -807,6 +781,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -831,15 +806,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part - numFiles 4 - numPartitions 1 - numRows 1000 partition_columns ds/hr - rawDataSize 9624 serialization.ddl struct list_bucketing_static_part { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part @@ -849,11 +819,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2406 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -874,7 +848,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' ORDER BY key, value, ds, hr PREHOOK: type: QUERY PREHOOK: Input: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 7839b7d..24f9e99 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -74,6 +74,7 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 500 rawDataSize 5312 @@ -132,22 +133,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -175,6 +184,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -199,15 +209,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -219,7 +224,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') PREHOOK: type: QUERY @@ -259,12 +263,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '238') and (value = 'val_238')) type: boolean + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -272,11 +280,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -304,6 +316,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -328,15 +341,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -348,7 +356,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') PREHOOK: type: QUERY @@ -389,22 +396,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = '3') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -432,6 +447,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -456,15 +472,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -476,7 +487,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") PREHOOK: type: QUERY @@ -515,12 +525,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = '495') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -528,11 +542,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -560,6 +578,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -584,15 +603,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -604,7 +618,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 8d81cae..ff210fa 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -72,6 +72,7 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 500 rawDataSize 5312 @@ -130,12 +131,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 'val_484') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -143,6 +148,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -150,6 +157,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -168,6 +177,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -192,15 +202,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -214,6 +219,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -238,15 +244,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -257,11 +258,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -282,7 +287,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and value= 'val_484' ORDER BY key, value PREHOOK: type: QUERY @@ -322,22 +326,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = '406') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 type: string sort order: + + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -354,6 +366,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -378,15 +391,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -396,11 +404,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -421,7 +433,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and key= '406' ORDER BY key PREHOOK: type: QUERY @@ -464,12 +475,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -477,6 +492,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -484,6 +501,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -502,6 +521,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -526,15 +546,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -548,6 +563,7 @@ STAGE PLANS: ds 1 hr 4 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -572,15 +588,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 3 - numPartitions 1 - numRows 500 partition_columns ds/hr - rawDataSize 5312 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -591,11 +602,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -616,7 +631,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238')) ORDER BY key, value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 188085a..6565e53 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -74,6 +74,7 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 @@ -143,6 +144,7 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 3 numRows 500 rawDataSize 5312 @@ -223,6 +225,7 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE true numFiles 2 numRows 500 rawDataSize 5312 @@ -272,12 +275,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = '145') type: boolean + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -289,11 +296,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 250 dataSize: 2656 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -321,6 +332,7 @@ STAGE PLANS: ds 1 hr 1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -345,15 +357,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 6 - numPartitions 3 - numRows 1500 partition_columns ds/hr - rawDataSize 15936 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -365,7 +372,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from fact_daily where ds = '1' and hr='1' and key='145' PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily @@ -409,16 +415,24 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -435,6 +449,7 @@ STAGE PLANS: ds 1 hr 1 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -459,15 +474,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 6 - numPartitions 3 - numRows 1500 partition_columns ds/hr - rawDataSize 15936 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -482,16 +492,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -512,7 +528,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select count(*) from fact_daily where ds = '1' and hr='1' PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily @@ -558,12 +573,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '484') and (value = 'val_484')) type: boolean + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -575,11 +594,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -607,6 +630,7 @@ STAGE PLANS: ds 1 hr 2 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -631,15 +655,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 6 - numPartitions 3 - numRows 1500 partition_columns ds/hr - rawDataSize 15936 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -651,7 +670,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484') PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily @@ -697,12 +715,16 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key = '327') and (value = 'val_327')) type: boolean + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -714,11 +736,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 125 dataSize: 1328 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -746,6 +772,7 @@ STAGE PLANS: ds 1 hr 3 properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.types string:string @@ -770,15 +797,10 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.fact_daily - numFiles 6 - numPartitions 3 - numRows 1500 partition_columns ds/hr - rawDataSize 15936 serialization.ddl struct fact_daily { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily @@ -790,7 +812,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327') PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily diff --git ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out index d06a41a..b88fc8f 100644 --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out @@ -135,6 +135,12 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE false +#### A masked pattern was here #### + numFiles 2 + numRows -1 + rawDataSize -1 + totalSize 8 #### A masked pattern was here #### # Storage Information @@ -188,22 +194,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 484) type: boolean + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -230,15 +244,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x columns.types int #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -305,22 +324,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 495) type: boolean + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -347,15 +374,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x columns.types int #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -418,22 +450,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 1) type: boolean + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -460,15 +500,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x columns.types int #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out index e14f49b..9e5b01d 100644 --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out @@ -151,6 +151,12 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE false +#### A masked pattern was here #### + numFiles 2 + numRows -1 + rawDataSize -1 + totalSize 24 #### A masked pattern was here #### # Storage Information @@ -208,22 +214,30 @@ STAGE PLANS: subq:fact_daily TableScan alias: fact_daily + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 484) type: boolean + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: _col0 + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -250,15 +264,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -329,12 +348,16 @@ STAGE PLANS: subq:fact_daily TableScan alias: fact_daily + Statistics: + numRows: 0 dataSize: 24 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 484) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: x @@ -342,6 +365,8 @@ STAGE PLANS: expr: y type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -349,6 +374,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -366,15 +393,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -400,11 +432,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -425,7 +461,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 ORDER BY x1, y1 PREHOOK: type: QUERY @@ -469,17 +504,23 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 0 dataSize: 24 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 484) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: y type: string outputColumnNames: y + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -489,6 +530,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -497,6 +540,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -512,15 +557,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -554,6 +604,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -561,11 +613,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -586,7 +642,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query select y, count(1) from fact_daily where ds ='1' and x = 484 group by y PREHOOK: type: QUERY @@ -630,17 +685,23 @@ STAGE PLANS: subq:fact_daily TableScan alias: fact_daily + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 484) type: boolean + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: x + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -650,6 +711,8 @@ STAGE PLANS: type: int mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -658,6 +721,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: int + Statistics: + numRows: 3 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -673,15 +738,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily + numFiles 2 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -715,6 +785,8 @@ STAGE PLANS: type: int mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -722,11 +794,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 4 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -747,7 +823,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query select x, c from (select x, count(1) as c from fact_daily where ds = '1' group by x) subq where x = 484 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out index f61708f..9dfdb28 100644 --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out @@ -201,6 +201,12 @@ Table: fact_daily Protect Mode: None #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE false +#### A masked pattern was here #### + numFiles 3 + numRows -1 + rawDataSize -1 + totalSize 117 #### A masked pattern was here #### # Storage Information @@ -272,22 +278,30 @@ STAGE PLANS: fact_daily TableScan alias: fact_daily + Statistics: + numRows: 29 dataSize: 117 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (not (x = 86)) type: boolean + Statistics: + numRows: 15 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: x type: int outputColumnNames: _col0 + Statistics: + numRows: 15 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 type: int sort order: + + Statistics: + numRows: 15 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -303,15 +317,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y,z columns.types int:string:string #### A masked pattern was here #### name default.fact_daily + numFiles 3 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y, string z} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 117 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -340,15 +359,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y,z columns.types int:string:string #### A masked pattern was here #### name default.fact_daily + numFiles 3 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y, string z} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 117 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -377,15 +401,20 @@ STAGE PLANS: partition values: ds 1 properties: + COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y,z columns.types int:string:string #### A masked pattern was here #### name default.fact_daily + numFiles 3 + numRows -1 partition_columns ds + rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y, string z} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 117 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -413,11 +442,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 15 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 15 dataSize: 60 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -438,7 +471,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: -- List Bucketing Query SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) ORDER BY x PREHOOK: type: QUERY