diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 0b45e25..57a1da4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -90,12 +90,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); if (parseInfo.isAnalyzeCommand()) { - boolean partialScan = parseInfo.isPartialScanAnalyzeCommand(); - boolean noScan = parseInfo.isNoScanAnalyzeCommand(); - if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { - + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // There will not be any MR or Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec()); snjWork.setStatsReliable(parseCtx.getConf().getBoolVar( diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index 2ecf8a5..bb27440 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -95,12 +95,12 @@ public Object process(Node nd, Stack stack, assert alias != null; TezWork tezWork = context.currentTask.getWork(); - boolean partialScan = parseInfo.isPartialScanAnalyzeCommand(); - boolean noScan = parseInfo.isNoScanAnalyzeCommand(); - if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { - + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // There will not be any Tez job above this task StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( diff --git ql/src/test/queries/clientpositive/orc_analyze.q ql/src/test/queries/clientpositive/orc_analyze.q index 3621c7a..bd22e6f 100644 --- ql/src/test/queries/clientpositive/orc_analyze.q +++ ql/src/test/queries/clientpositive/orc_analyze.q @@ -30,8 +30,13 @@ STORED AS orc; INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people compute statistics; +desc formatted orc_create_people; + analyze table orc_create_people compute statistics partialscan; +desc formatted orc_create_people; +analyze table orc_create_people compute statistics noscan; desc formatted orc_create_people; drop table orc_create_people; @@ -70,8 +75,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state) SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); + analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="Or"); @@ -116,8 +128,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state) SELECT * FROM orc_create_people_staging ORDER BY id; set hive.stats.autogather = true; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); + analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="Or"); +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="Or"); @@ -174,8 +193,15 @@ ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSer ALTER TABLE orc_create_people SET FILEFORMAT ORC; set hive.stats.autogather = true; -analyze table orc_create_people partition(state) compute statistics noscan; +analyze table orc_create_people partition(state) compute statistics; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="OH"); +analyze table orc_create_people partition(state) compute statistics partialscan; +desc formatted orc_create_people partition(state="Ca"); +desc formatted orc_create_people partition(state="OH"); + +analyze table orc_create_people partition(state) compute statistics noscan; desc formatted orc_create_people partition(state="Ca"); desc formatted orc_create_people partition(state="OH"); diff --git ql/src/test/results/clientpositive/orc_analyze.q.out ql/src/test/results/clientpositive/orc_analyze.q.out index 60a23d4..b0e2ea5 100644 --- ql/src/test/results/clientpositive/orc_analyze.q.out +++ ql/src/test/results/clientpositive/orc_analyze.q.out @@ -71,6 +71,55 @@ POSTHOOK: Lineage: orc_create_people.last_name SIMPLE [(orc_create_people_stagin POSTHOOK: Lineage: orc_create_people.salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] POSTHOOK: Lineage: orc_create_people.start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_create_people.state SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table orc_create_people compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: analyze table orc_create_people compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: desc formatted orc_create_people +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp +state string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 100 + rawDataSize 52600 + totalSize 3158 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: analyze table orc_create_people compute statistics partialscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people @@ -120,6 +169,55 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: analyze table orc_create_people compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: analyze table orc_create_people compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: desc formatted orc_create_people +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp +state string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 100 + rawDataSize 52600 + totalSize 3158 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: drop table orc_create_people PREHOOK: type: DROPTABLE PREHOOK: Input: default@orc_create_people @@ -269,13 +367,13 @@ POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_ POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people PREHOOK: Output: default@orc_create_people@state=Ca PREHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people @@ -371,63 +469,18 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: drop table orc_create_people -PREHOOK: type: DROPTABLE +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people -POSTHOOK: query: drop table orc_create_people -POSTHOOK: type: DROPTABLE +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- auto stats gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -STORED AS orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: -- auto stats gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -STORED AS orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -PREHOOK: type: QUERY -PREHOOK: Input: default@orc_create_people_staging -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orc_create_people_staging POSTHOOK: Output: default@orc_create_people@state=Ca POSTHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] PREHOOK: query: desc formatted orc_create_people partition(state="Ca") PREHOOK: type: DESCTABLE PREHOOK: Input: default@orc_create_people @@ -518,78 +571,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: drop table orc_create_people -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@orc_create_people -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: drop table orc_create_people -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@orc_create_people -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- partitioned and bucketed table --- partial scan gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets -STORED AS orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: -- partitioned and bucketed table --- partial scan gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets -STORED AS orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -PREHOOK: type: QUERY -PREHOOK: Input: default@orc_create_people_staging -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orc_create_people_staging -POSTHOOK: Output: default@orc_create_people@state=Ca -POSTHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people PREHOOK: Output: default@orc_create_people@state=Ca PREHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people @@ -635,9 +623,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: desc formatted orc_create_people partition(state="Or") @@ -680,9 +668,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: drop table orc_create_people @@ -702,9 +690,6 @@ CREATE TABLE orc_create_people ( salary decimal, start_date timestamp) PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets STORED AS orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -718,9 +703,6 @@ CREATE TABLE orc_create_people ( salary decimal, start_date timestamp) PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets STORED AS orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -788,9 +770,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: desc formatted orc_create_people partition(state="Or") @@ -833,9 +815,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: drop table orc_create_people @@ -846,10 +828,534 @@ POSTHOOK: query: drop table orc_create_people POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- create table with partitions containing text and ORC files. --- ORC files implements StatsProvidingRecordReader but text files does not. --- So the partition containing text file should not have statistics. -CREATE TABLE orc_create_people ( +PREHOOK: query: -- partitioned and bucketed table +-- partial scan gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: -- partitioned and bucketed table +-- partial scan gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people_staging +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people_staging +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table orc_create_people +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: drop table orc_create_people +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: -- auto stats gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: -- auto stats gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people_staging +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people_staging +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table orc_create_people +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: drop table orc_create_people +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: -- create table with partitions containing text and ORC files. +-- ORC files implements StatsProvidingRecordReader but text files does not. +-- So the partition containing text file should not have statistics. +CREATE TABLE orc_create_people ( id int, first_name string, last_name string, @@ -946,6 +1452,214 @@ POSTHOOK: query: ALTER TABLE orc_create_people SET FILEFORMAT ORC POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=OH +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=OH +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="OH") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="OH") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [OH] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=OH +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=OH +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="OH") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="OH") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [OH] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people diff --git ql/src/test/results/clientpositive/tez/orc_analyze.q.out ql/src/test/results/clientpositive/tez/orc_analyze.q.out index 60a23d4..b0e2ea5 100644 --- ql/src/test/results/clientpositive/tez/orc_analyze.q.out +++ ql/src/test/results/clientpositive/tez/orc_analyze.q.out @@ -71,6 +71,55 @@ POSTHOOK: Lineage: orc_create_people.last_name SIMPLE [(orc_create_people_stagin POSTHOOK: Lineage: orc_create_people.salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] POSTHOOK: Lineage: orc_create_people.start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] POSTHOOK: Lineage: orc_create_people.state SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table orc_create_people compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: analyze table orc_create_people compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: desc formatted orc_create_people +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp +state string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 100 + rawDataSize 52600 + totalSize 3158 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: analyze table orc_create_people compute statistics partialscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people @@ -120,6 +169,55 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: analyze table orc_create_people compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: analyze table orc_create_people compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: desc formatted orc_create_people +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp +state string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 100 + rawDataSize 52600 + totalSize 3158 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: drop table orc_create_people PREHOOK: type: DROPTABLE PREHOOK: Input: default@orc_create_people @@ -269,13 +367,13 @@ POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_ POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people PREHOOK: Output: default@orc_create_people@state=Ca PREHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people @@ -371,63 +469,18 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: drop table orc_create_people -PREHOOK: type: DROPTABLE +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people -POSTHOOK: query: drop table orc_create_people -POSTHOOK: type: DROPTABLE +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- auto stats gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -STORED AS orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: -- auto stats gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -STORED AS orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -PREHOOK: type: QUERY -PREHOOK: Input: default@orc_create_people_staging -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orc_create_people_staging POSTHOOK: Output: default@orc_create_people@state=Ca POSTHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] PREHOOK: query: desc formatted orc_create_people partition(state="Ca") PREHOOK: type: DESCTABLE PREHOOK: Input: default@orc_create_people @@ -518,78 +571,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: drop table orc_create_people -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@orc_create_people -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: drop table orc_create_people -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@orc_create_people -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- partitioned and bucketed table --- partial scan gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets -STORED AS orc -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: -- partitioned and bucketed table --- partial scan gather -CREATE TABLE orc_create_people ( - id int, - first_name string, - last_name string, - address string, - salary decimal, - start_date timestamp) -PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets -STORED AS orc -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@orc_create_people -PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -PREHOOK: type: QUERY -PREHOOK: Input: default@orc_create_people_staging -PREHOOK: Output: default@orc_create_people -POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) - SELECT * FROM orc_create_people_staging ORDER BY id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orc_create_people_staging -POSTHOOK: Output: default@orc_create_people@state=Ca -POSTHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] -POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] -PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people PREHOOK: Output: default@orc_create_people PREHOOK: Output: default@orc_create_people@state=Ca PREHOOK: Output: default@orc_create_people@state=Or -POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people @@ -635,9 +623,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: desc formatted orc_create_people partition(state="Or") @@ -680,9 +668,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: drop table orc_create_people @@ -702,9 +690,6 @@ CREATE TABLE orc_create_people ( salary decimal, start_date timestamp) PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets STORED AS orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -718,9 +703,6 @@ CREATE TABLE orc_create_people ( salary decimal, start_date timestamp) PARTITIONED BY (state string) -clustered by (first_name) -sorted by (last_name) -into 4 buckets STORED AS orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -788,9 +770,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: desc formatted orc_create_people partition(state="Or") @@ -833,9 +815,9 @@ SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [first_name] -Sort Columns: [Order(col:last_name, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: drop table orc_create_people @@ -846,10 +828,534 @@ POSTHOOK: query: drop table orc_create_people POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people -PREHOOK: query: -- create table with partitions containing text and ORC files. --- ORC files implements StatsProvidingRecordReader but text files does not. --- So the partition containing text file should not have statistics. -CREATE TABLE orc_create_people ( +PREHOOK: query: -- partitioned and bucketed table +-- partial scan gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: -- partitioned and bucketed table +-- partial scan gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people_staging +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people_staging +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table orc_create_people +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: drop table orc_create_people +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: -- auto stats gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: -- auto stats gather +CREATE TABLE orc_create_people ( + id int, + first_name string, + last_name string, + address string, + salary decimal, + start_date timestamp) +PARTITIONED BY (state string) +clustered by (first_name) +sorted by (last_name) +into 4 buckets +STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people_staging +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_people PARTITION (state) + SELECT * FROM orc_create_people_staging ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people_staging +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Ca).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).address SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:address, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).first_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).id SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).last_name SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:last_name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).salary SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_create_people PARTITION(state=Or).start_date SIMPLE [(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date, type:timestamp, comment:null), ] +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="Or") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Or") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Or] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 22050 + totalSize 2071 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 4 +Bucket Columns: [first_name] +Sort Columns: [Order(col:last_name, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table orc_create_people +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +POSTHOOK: query: drop table orc_create_people +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +PREHOOK: query: -- create table with partitions containing text and ORC files. +-- ORC files implements StatsProvidingRecordReader but text files does not. +-- So the partition containing text file should not have statistics. +CREATE TABLE orc_create_people ( id int, first_name string, last_name string, @@ -946,6 +1452,214 @@ POSTHOOK: query: ALTER TABLE orc_create_people SET FILEFORMAT ORC POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@orc_create_people POSTHOOK: Output: default@orc_create_people +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=OH +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=OH +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="OH") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="OH") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [OH] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_people +PREHOOK: Output: default@orc_create_people +PREHOOK: Output: default@orc_create_people@state=Ca +PREHOOK: Output: default@orc_create_people@state=OH +PREHOOK: Output: default@orc_create_people@state=Or +POSTHOOK: query: analyze table orc_create_people partition(state) compute statistics partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_people +POSTHOOK: Output: default@orc_create_people +POSTHOOK: Output: default@orc_create_people@state=Ca +POSTHOOK: Output: default@orc_create_people@state=OH +POSTHOOK: Output: default@orc_create_people@state=Or +PREHOOK: query: desc formatted orc_create_people partition(state="Ca") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="Ca") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [Ca] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 50 + rawDataSize 21950 + totalSize 2055 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_create_people partition(state="OH") +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_create_people +POSTHOOK: query: desc formatted orc_create_people partition(state="OH") +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_create_people +# col_name data_type comment + +id int +first_name string +last_name string +address string +salary decimal(10,0) +start_date timestamp + +# Partition Information +# col_name data_type comment + +state string + +# Detailed Partition Information +Partition Value: [OH] +Database: default +Table: orc_create_people +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE false + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: analyze table orc_create_people partition(state) compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_people