diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index 2ea6154..ffd5b04 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -46,7 +46,7 @@ // does not merge, the file will be put into incompatible file set and will // not be merged. CompressionKind compression = null; - long compressBuffSize = 0; + int compressBuffSize = 0; OrcFile.Version version; int columnCount = 0; int rowIndexStride = 0; @@ -108,6 +108,7 @@ private void processKeyValuePairs(Object key, Object value) outWriter = OrcFile.createWriter(outPath, OrcFile.writerOptions(jc) .compress(compression) + .bufferSize(compressBuffSize) .version(version) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java index a62fc1e..8eda37f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java @@ -34,7 +34,7 @@ private Path inputPath; private CompressionKind compression; - private long compressBufferSize; + private int compressBufferSize; private List types; private int rowIndexStride; private OrcFile.Version version; @@ -64,11 +64,11 @@ public void setRowIndexStride(int rowIndexStride) { this.rowIndexStride = rowIndexStride; } - public long getCompressBufferSize() { + public int getCompressBufferSize() { return compressBufferSize; } - public void setCompressBufferSize(long compressBufferSize) { + public void setCompressBufferSize(int compressBufferSize) { this.compressBufferSize = compressBufferSize; } diff --git a/ql/src/test/queries/clientpositive/orc_merge10.q b/ql/src/test/queries/clientpositive/orc_merge10.q new file mode 100644 index 0000000..e4e0ebe --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_merge10.q @@ -0,0 +1,108 @@ +set hive.explain.user=false; +set hive.merge.orcfile.stripe.level=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.optimize.sort.dynamic.partition=false; +set mapred.min.split.size=1000; +set mapred.max.split.size=2000; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=2000; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- SORT_QUERY_RESULTS + +DROP TABLE orcfile_merge1; +DROP TABLE orcfile_merge1b; +DROP TABLE orcfile_merge1c; + +CREATE TABLE orcfile_merge1 (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096"); +CREATE TABLE orcfile_merge1b (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096"); +CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096"); + +-- merge disabled +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/; + +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; +-- auto-merge slow way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/; + +set hive.merge.orcfile.stripe.level=true; +-- auto-merge fast way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/; + +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +-- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t; + +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1b WHERE ds='1' +) t; + +select count(*) from orcfile_merge1; +select count(*) from orcfile_merge1b; + +-- concatenate +explain ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE; +ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/; + +-- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t; + +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t; + +select count(*) from orcfile_merge1; +select count(*) from orcfile_merge1c; + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump; +select * from orcfile_merge1 where ds='1' and part='0' limit 1; +select * from orcfile_merge1c where ds='1' and part='0' limit 1; + +DROP TABLE orcfile_merge1; +DROP TABLE orcfile_merge1b; +DROP TABLE orcfile_merge1c; diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out new file mode 100644 index 0000000..89d0bf6 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -0,0 +1,644 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE orcfile_merge1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +DROP TABLE orcfile_merge1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orcfile_merge1b +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orcfile_merge1b +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orcfile_merge1c +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge1 +POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge1 +PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge1b +POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge1b +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge1c +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge1c +PREHOOK: query: -- merge disabled +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- merge disabled +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + part + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge1@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1 +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: -- auto-merge slow way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- auto-merge slow way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + part + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge1b@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0 +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1 +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: -- auto-merge fast way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- auto-merge fast way +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + part + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge1c@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1 +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1b WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1b +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1b WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1b +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 +PREHOOK: query: select count(*) from orcfile_merge1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from orcfile_merge1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from orcfile_merge1b +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1b +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from orcfile_merge1b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1b +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1 +#### A masked pattern was here #### +500 +PREHOOK: query: -- concatenate +explain ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: -- concatenate +explain ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 1 + part 0 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Output: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: query: ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0 +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1 WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 +PREHOOK: query: select count(*) from orcfile_merge1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from orcfile_merge1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from orcfile_merge1c +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from orcfile_merge1c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select * from orcfile_merge1 where ds='1' and part='0' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with HIVE_4243 +Rows: 242 +Compression: SNAPPY +Compression size: 4096 +Type: struct + +Stripe Statistics: + Stripe 1: + Column 0: count: 152 hasNull: false + Column 1: count: 152 hasNull: false min: 0 max: 497 sum: 38034 + Column 2: count: 152 hasNull: false min: val_0 max: val_97 sum: 1034 + Stripe 2: + Column 0: count: 90 hasNull: false + Column 1: count: 90 hasNull: false min: 0 max: 495 sum: 22736 + Column 2: count: 90 hasNull: false min: val_0 max: val_86 sum: 612 + +File Statistics: + Column 0: count: 242 hasNull: false + Column 1: count: 242 hasNull: false min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 + +Stripes: + Stripe: offset: 3 data: 988 rows: 152 tail: 72 index: 77 + Stream: column 0 section ROW_INDEX start: 3 length 12 + Stream: column 1 section ROW_INDEX start: 15 length 28 + Stream: column 2 section ROW_INDEX start: 43 length 37 + Stream: column 1 section DATA start: 80 length 309 + Stream: column 2 section DATA start: 389 length 157 + Stream: column 2 section LENGTH start: 546 length 60 + Stream: column 2 section DICTIONARY_DATA start: 606 length 462 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DICTIONARY_V2[114] + Row group indices for column 1: + Entry 0: count: 152 hasNull: false min: 0 max: 497 sum: 38034 positions: 0,0,0 + Stripe: offset: 1140 data: 616 rows: 90 tail: 61 index: 76 + Stream: column 0 section ROW_INDEX start: 1140 length 11 + Stream: column 1 section ROW_INDEX start: 1151 length 27 + Stream: column 2 section ROW_INDEX start: 1178 length 38 + Stream: column 1 section DATA start: 1216 length 185 + Stream: column 2 section DATA start: 1401 length 377 + Stream: column 2 section LENGTH start: 1778 length 54 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Row group indices for column 1: + Entry 0: count: 90 hasNull: false min: 0 max: 495 sum: 22736 positions: 0,0,0 + +File length: 2137 bytes +Padding length: 0 bytes +Padding ratio: 0% +-- END ORC FILE DUMP -- +172 val_172 1 0 +PREHOOK: query: select * from orcfile_merge1c where ds='1' and part='0' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with HIVE_4243 +Rows: 242 +Compression: SNAPPY +Compression size: 4096 +Type: struct + +Stripe Statistics: + Stripe 1: + Column 0: count: 152 hasNull: false + Column 1: count: 152 hasNull: false min: 0 max: 497 sum: 38034 + Column 2: count: 152 hasNull: false min: val_0 max: val_97 sum: 1034 + Stripe 2: + Column 0: count: 90 hasNull: false + Column 1: count: 90 hasNull: false min: 0 max: 495 sum: 22736 + Column 2: count: 90 hasNull: false min: val_0 max: val_86 sum: 612 + +File Statistics: + Column 0: count: 242 hasNull: false + Column 1: count: 242 hasNull: false min: 0 max: 497 sum: 60770 + Column 2: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 + +Stripes: + Stripe: offset: 3 data: 988 rows: 152 tail: 72 index: 77 + Stream: column 0 section ROW_INDEX start: 3 length 12 + Stream: column 1 section ROW_INDEX start: 15 length 28 + Stream: column 2 section ROW_INDEX start: 43 length 37 + Stream: column 1 section DATA start: 80 length 309 + Stream: column 2 section DATA start: 389 length 157 + Stream: column 2 section LENGTH start: 546 length 60 + Stream: column 2 section DICTIONARY_DATA start: 606 length 462 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DICTIONARY_V2[114] + Row group indices for column 1: + Entry 0: count: 152 hasNull: false min: 0 max: 497 sum: 38034 positions: 0,0,0 + Stripe: offset: 1140 data: 616 rows: 90 tail: 61 index: 76 + Stream: column 0 section ROW_INDEX start: 1140 length 11 + Stream: column 1 section ROW_INDEX start: 1151 length 27 + Stream: column 2 section ROW_INDEX start: 1178 length 38 + Stream: column 1 section DATA start: 1216 length 185 + Stream: column 2 section DATA start: 1401 length 377 + Stream: column 2 section LENGTH start: 1778 length 54 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Row group indices for column 1: + Entry 0: count: 90 hasNull: false min: 0 max: 495 sum: 22736 positions: 0,0,0 + +File length: 2137 bytes +Padding length: 0 bytes +Padding ratio: 0% +-- END ORC FILE DUMP -- +172 val_172 1 0 +PREHOOK: query: DROP TABLE orcfile_merge1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge1 +PREHOOK: Output: default@orcfile_merge1 +PREHOOK: query: DROP TABLE orcfile_merge1b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge1b +PREHOOK: Output: default@orcfile_merge1b +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Output: default@orcfile_merge1c