diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1cc0104..5f30473 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -212,6 +212,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ orc_merge_incompat1.q,\ orc_merge_incompat2.q,\ orc_merge_incompat3.q,\ + orc_merge_incompat_schema.q,\ + orc_merge_incompat_writer_version.q,\ orc_ppd_schema_evol_1a.q,\ orc_ppd_schema_evol_1b.q,\ orc_ppd_schema_evol_2a.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index d9547b9..46905ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -22,6 +22,7 @@ import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.io.orc.Writer; +import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataInputStream; @@ -46,14 +47,15 @@ // These parameters must match for all orc files involved in merging. If it // does not merge, the file will be put into incompatible file set and will // not be merged. - CompressionKind compression = null; - int compressBuffSize = 0; - OrcFile.Version version; - int columnCount = 0; - int rowIndexStride = 0; - - Writer outWriter; - Path prevPath; + private CompressionKind compression = null; + private int compressBuffSize = 0; + private OrcFile.Version fileVersion; + private OrcFile.WriterVersion writerVersion; + private TypeDescription fileSchema; + private int rowIndexStride = 0; + + private Writer outWriter; + private Path prevPath; private Reader reader; private FSDataInputStream fdis; @@ -110,13 +112,14 @@ private void processKeyValuePairs(Object key, Object value) if (outWriter == null) { compression = k.getCompression(); compressBuffSize = k.getCompressBufferSize(); - version = k.getVersion(); - columnCount = k.getTypes().get(0).getSubtypesCount(); + fileVersion = k.getFileVersion(); + writerVersion = k.getWriterVersion(); + fileSchema = k.getFileSchema(); rowIndexStride = k.getRowIndexStride(); OrcFile.WriterOptions options = OrcFile.writerOptions(jc) .compress(compression) - .version(version) + .version(fileVersion) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector()); // compression buffer size should only be set if compression is enabled @@ -185,8 +188,8 @@ private void processKeyValuePairs(Object key, Object value) private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files - if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { - LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); + if (!fileSchema.equals(k.getFileSchema())) { + LOG.warn("Incompatible ORC file merge! Schema mismatch for " + k.getInputPath()); return false; } @@ -201,8 +204,13 @@ private boolean checkCompatibility(OrcFileKeyWrapper k) { } - if (!k.getVersion().equals(version)) { - LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); + if (!k.getFileVersion().equals(fileVersion)) { + LOG.warn("Incompatible ORC file merge! File version mismatch for " + k.getInputPath()); + return false; + } + + if (!k.getWriterVersion().equals(writerVersion)) { + LOG.warn("Incompatible ORC file merge! Writer version mismatch for " + k.getInputPath()); return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java index 40f1da0..e82472b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileKeyWrapper.java @@ -21,12 +21,11 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.List; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.WritableComparable; import org.apache.orc.CompressionKind; -import org.apache.orc.OrcProto; +import org.apache.orc.TypeDescription; /** * Key for OrcFileMergeMapper task. Contains orc file related information that @@ -37,9 +36,10 @@ private Path inputPath; private CompressionKind compression; private int compressBufferSize; - private List types; + private TypeDescription fileSchema; private int rowIndexStride; - private OrcFile.Version version; + private OrcFile.Version fileVersion; + private OrcFile.WriterVersion writerVersion; private boolean isIncompatFile; public boolean isIncompatFile() { @@ -50,16 +50,24 @@ public void setIsIncompatFile(boolean isIncompatFile) { this.isIncompatFile = isIncompatFile; } - public OrcFile.Version getVersion() { - return version; + public int getRowIndexStride() { + return rowIndexStride; } - public void setVersion(OrcFile.Version version) { - this.version = version; + public OrcFile.Version getFileVersion() { + return fileVersion; } - public int getRowIndexStride() { - return rowIndexStride; + public void setFileVersion(final OrcFile.Version fileVersion) { + this.fileVersion = fileVersion; + } + + public OrcFile.WriterVersion getWriterVersion() { + return writerVersion; + } + + public void setWriterVersion(final OrcFile.WriterVersion writerVersion) { + this.writerVersion = writerVersion; } public void setRowIndexStride(int rowIndexStride) { @@ -82,12 +90,12 @@ public void setCompression(CompressionKind compression) { this.compression = compression; } - public List getTypes() { - return types; + public TypeDescription getFileSchema() { + return fileSchema; } - public void setTypes(List types) { - this.types = types; + public void setFileSchema(final TypeDescription fileSchema) { + this.fileSchema = fileSchema; } public Path getInputPath() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java index f06195f..c0fbf4f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeRecordReader.java @@ -102,9 +102,10 @@ protected boolean nextStripe(OrcFileKeyWrapper keyWrapper, OrcFileValueWrapper v keyWrapper.setInputPath(path); keyWrapper.setCompression(reader.getCompressionKind()); keyWrapper.setCompressBufferSize(reader.getCompressionSize()); - keyWrapper.setVersion(reader.getFileVersion()); + keyWrapper.setFileVersion(reader.getFileVersion()); + keyWrapper.setWriterVersion(reader.getWriterVersion()); keyWrapper.setRowIndexStride(reader.getRowIndexStride()); - keyWrapper.setTypes(reader.getTypes()); + keyWrapper.setFileSchema(reader.getSchema()); } else { stripeIdx++; continue; diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q b/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q new file mode 100644 index 0000000..098b41e --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q @@ -0,0 +1,47 @@ +set hive.metastore.disallow.incompatible.col.type.changes=false; + +CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':'; + +LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging; + +CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000"); + +INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +-- will be merged as the schema is the same +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT; + +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +-- schema is different for both files, will not be merged +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat_writer_version.q b/ql/src/test/queries/clientpositive/orc_merge_incompat_writer_version.q new file mode 100644 index 0000000..5e188f7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_merge_incompat_writer_version.q @@ -0,0 +1,30 @@ +DROP TABLE part_orc; +CREATE TABLE part_orc( + p_partkey int, + p_name string, + p_mfgr string, + p_brand string, + p_type string, + p_size int, + p_container string, + p_retailprice double, + p_comment string +) +STORED AS ORC; + +-- writer version for this file is HIVE_13083 +LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE part_orc; + +create table part_orc_staging as select * from part_orc; + +-- will be written with current writer version +insert into table part_orc select * from part_orc_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/part_orc/; +select sum(hash(*)) from part_orc; + +-- will not be merged as writer version is not matching +ALTER TABLE part_orc CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/part_orc/; +select sum(hash(*)) from part_orc; diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat_schema.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat_schema.q.out new file mode 100644 index 0000000..c11bac4 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat_schema.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct EXPRESSION [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat_writer_version.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat_writer_version.q.out new file mode 100644 index 0000000..109f7b1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat_writer_version.q.out @@ -0,0 +1,108 @@ +PREHOOK: query: DROP TABLE part_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE part_orc( + p_partkey int, + p_name string, + p_mfgr string, + p_brand string, + p_type string, + p_size int, + p_container string, + p_retailprice double, + p_comment string +) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_orc +POSTHOOK: query: CREATE TABLE part_orc( + p_partkey int, + p_name string, + p_mfgr string, + p_brand string, + p_type string, + p_size int, + p_container string, + p_retailprice double, + p_comment string +) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE part_orc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part_orc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE part_orc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part_orc +PREHOOK: query: create table part_orc_staging as select * from part_orc +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part_orc +PREHOOK: Output: database:default +PREHOOK: Output: default@part_orc_staging +POSTHOOK: query: create table part_orc_staging as select * from part_orc +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part_orc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_orc_staging +POSTHOOK: Lineage: part_orc_staging.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_comment SIMPLE [(part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_container SIMPLE [(part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_name SIMPLE [(part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_partkey SIMPLE [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_retailprice SIMPLE [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_size SIMPLE [(part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_type SIMPLE [(part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into table part_orc select * from part_orc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc_staging +PREHOOK: Output: default@part_orc +POSTHOOK: query: insert into table part_orc select * from part_orc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc_staging +POSTHOOK: Output: default@part_orc +POSTHOOK: Lineage: part_orc.p_brand SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_comment SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_container SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_mfgr SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_name SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_type, type:string, comment:null), ] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from part_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from part_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc +#### A masked pattern was here #### +26132451616 +PREHOOK: query: ALTER TABLE part_orc CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@part_orc +PREHOOK: Output: default@part_orc +POSTHOOK: query: ALTER TABLE part_orc CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@part_orc +POSTHOOK: Output: default@part_orc +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from part_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from part_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc +#### A masked pattern was here #### +26132451616 diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat_schema.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat_schema.q.out new file mode 100644 index 0000000..c11bac4 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_merge_incompat_schema.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP, + lst ARRAY, + strct STRUCT, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct EXPRESSION [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat_writer_version.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat_writer_version.q.out new file mode 100644 index 0000000..109f7b1 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_merge_incompat_writer_version.q.out @@ -0,0 +1,108 @@ +PREHOOK: query: DROP TABLE part_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE part_orc( + p_partkey int, + p_name string, + p_mfgr string, + p_brand string, + p_type string, + p_size int, + p_container string, + p_retailprice double, + p_comment string +) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_orc +POSTHOOK: query: CREATE TABLE part_orc( + p_partkey int, + p_name string, + p_mfgr string, + p_brand string, + p_type string, + p_size int, + p_container string, + p_retailprice double, + p_comment string +) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE part_orc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part_orc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE part_orc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part_orc +PREHOOK: query: create table part_orc_staging as select * from part_orc +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part_orc +PREHOOK: Output: database:default +PREHOOK: Output: default@part_orc_staging +POSTHOOK: query: create table part_orc_staging as select * from part_orc +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part_orc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_orc_staging +POSTHOOK: Lineage: part_orc_staging.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_comment SIMPLE [(part_orc)part_orc.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_container SIMPLE [(part_orc)part_orc.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_name SIMPLE [(part_orc)part_orc.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_partkey SIMPLE [(part_orc)part_orc.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_retailprice SIMPLE [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_size SIMPLE [(part_orc)part_orc.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc_staging.p_type SIMPLE [(part_orc)part_orc.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into table part_orc select * from part_orc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc_staging +PREHOOK: Output: default@part_orc +POSTHOOK: query: insert into table part_orc select * from part_orc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc_staging +POSTHOOK: Output: default@part_orc +POSTHOOK: Lineage: part_orc.p_brand SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_comment SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_container SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_mfgr SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_name SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_orc_staging)part_orc_staging.FieldSchema(name:p_type, type:string, comment:null), ] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from part_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from part_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc +#### A masked pattern was here #### +26132451616 +PREHOOK: query: ALTER TABLE part_orc CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@part_orc +PREHOOK: Output: default@part_orc +POSTHOOK: query: ALTER TABLE part_orc CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@part_orc +POSTHOOK: Output: default@part_orc +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from part_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@part_orc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from part_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_orc +#### A masked pattern was here #### +26132451616